\documentclass[hidelinks,11pt]{article}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{fullpage}
\usepackage{amsmath}
\usepackage{amsthm}
\usepackage{array}
\usepackage{color}
\usepackage{graphicx}
\usepackage{float}
\usepackage{hyperref}
\usepackage{listings}
\usepackage[margin=.9in]{geometry}
\usepackage{setspace}
\usepackage{natbib}
\usepackage{proof}
\usepackage{multirow}
\usepackage{hhline}
\usepackage{wrapfig}
\usepackage [english]{babel}
\usepackage{grffile}
\usepackage{tikz}
\usepackage{pgfplots}
\usepackage[tikz]{bclogo}
\usetikzlibrary{chains}
\usetikzlibrary{positioning}
\usetikzlibrary{arrows}
\usepackage{lscape}
\usepackage [autostyle, english = american]{csquotes}
\usepackage{enumitem}
\usepackage{caption}
\usepackage{subcaption}
\usepackage{indentfirst}
\usepackage{pdfpages}
\usepackage{booktabs}
\newcommand{\tabitem}{~~\llap{\textbullet}~~}
\bibliographystyle{apsr}
\bibpunct{(}{)}{;}{a}{,}{,}
\DeclareGraphicsExtensions{.pdf,.png,.jpg}
\setlength{\tabcolsep}{.18cm}
\usepackage{fancyvrb}
\usepackage{numprint}
\npthousandsep{,}
\usepackage{etoc}
\usepackage{sectsty}
\sectionfont{\fontsize{12}{12}\selectfont}
\subsectionfont{\fontsize{12}{12}\selectfont}

\usepackage{titlesec}

\makeatletter
\@addtoreset{section}{part}
\@addtoreset{figure}{part}
\@addtoreset{table}{part}
\makeatother
\titleformat{\part}[display]
{\normalfont\LARGE\bfseries}{}{0pt}{}

\newcolumntype{L}[1]{>{\raggedright\let\newline\\\arraybackslash\hspace{0pt}}m{#1}}
\newcolumntype{C}[1]{>{\centering\let\newline\\\arraybackslash\hspace{0pt}}m{#1}}
\newcolumntype{R}[1]{>{\raggedleft\let\newline\\\arraybackslash\hspace{0pt}}m{#1}}

\makeatletter
\renewcommand{\l@section}{\@dottedtocline{1}{1.5em}{2.6em}}
\renewcommand{\l@subsection}{\@dottedtocline{2}{4.0em}{3.6em}}
\renewcommand{\l@subsubsection}{\@dottedtocline{3}{7.4em}{4.5em}}
\makeatother

% ==Cross Referencing Different Docs
\usepackage{xr}
\externaldocument{RefugeesCivilWar_Paper_APSRfinal}

\begin{document}

<<eval = TRUE, echo=FALSE, results='hide', message=FALSE>>= 

library(knitr)

opts_chunk$set(cache = TRUE, 
               cache.path = 'cache_SI/',
               fig.path = 'figures_SI/', 
               tidy = TRUE, 
               echo = FALSE, 
               warning = FALSE, 
               message = FALSE, 
               fig.pos = 't!',
               dev = 'pdf', 
               dpi=200)

options(width = 110, digits = 1)

@

<<eval=TRUE, echo=FALSE, results='hide', message=FALSE>>= 

setwd("Paper_Inputs")

### Load functions and packages
source("RefCivFunctions.R")

# Note: rtb is the main treatment variable -- it refers to refugee informal settlements or formal camps
# rcb is presence of refugee camps 
# rsb is presence of refugee settlements 

##### Load GED 2020 data #####
## Main data
geddata <- read.csv("panel.full_GED_2020.csv", header = T) # GED panel with all provinces
geddatasub <- read.csv("panel.subset_GED_2020.csv", header = T) # subset GED panel to provinces in countries with rtb 
geddatasub2 <- read.csv("panel.dynamic.subset_GED_2020.csv", header = T) #subset main panel to provinces in countries with rtb dynamically 

geddata$nlights_calib_mean <- geddata$nlights_calib_mean*100
geddatasub$nlights_calib_mean <- geddatasub$nlights_calib_mean*100
geddatasub2$nlights_calib_mean <- geddatasub2$nlights_calib_mean*100

geddata$rtb_rtb.other <- geddata$rtb*geddata$rtb.other #interaction variable for ease of presentation
geddatasub$rtb_rtb.other <- geddatasub$rtb*geddatasub$rtb.other
geddatasub2$rtb_rtb.other <- geddatasub2$rtb*geddatasub2$rtb.other

geddata$rcb_rcb.other <- geddata$rcb*geddata$rcb.other 
geddatasub$rcb_rcb.other <- geddatasub$rcb*geddatasub$rcb.other
geddatasub2$rcb_rcb.other <- geddatasub2$rcb*geddatasub2$rcb.other

geddata$rsb_rsb.other <- geddata$rsb*geddata$rsb.other 
geddatasub$rsb_rsb.other <- geddatasub$rsb*geddatasub$rsb.other
geddatasub2$rsb_rsb.other <- geddatasub2$rsb*geddatasub2$rsb.other

geddatasub$new_site_rt_1_rtb.other <- geddatasub$new_site_rt_1*geddatasub$rtb.other  
geddatasub$new_site_rt_2_rtb.other <- geddatasub$new_site_rt_2*geddatasub$rtb.other
geddatasub$no_new_site_rt_1_rtb.other <- geddatasub$no_new_site_rt_1*geddatasub$rtb.other
geddatasub$no_new_site_rt_2_rtb.other <- geddatasub$no_new_site_rt_2*geddatasub$rtb.other

geddata$rtb_nearborder <- geddata$rtb*geddata$nearborder #interaction variable for ease of presentation
geddatasub$rtb_nearborder <- geddatasub$rtb*geddatasub$nearborder

geddata$rtb_best_neighbors_sum_binary <- geddata$rtb*geddata$best_neighbors_sum_binary
geddatasub$rtb_best_neighbors_sum_binary <- geddatasub$rtb*geddatasub$best_neighbors_sum_binary

geddata$rtb_best_foreign_neighbors_sum_binary <- geddata$rtb*geddata$best_foreign_neighbors_sum_binary
geddatasub$rtb_best_foreign_neighbors_sum_binary <- geddatasub$rtb*geddatasub$best_foreign_neighbors_sum_binary

## rtb placebo data
plagedrtb <- read.csv("panel.placebo.rtb.t1_GED_2020.csv", header = T) #placebo data for rtb, all provinces
plagedrtbsub <- read.csv("panel.placebo.rtb.t1.subset_GED_2020.csv", header = T) #subset placebo rtb to provinces in countries with rtb 
plagedrtbsub2 <- read.csv("panel.placebo.dynamic.subset.rtb.t1_GED_2020.csv", header = T) #subset main panel to provinces in countries with rtb dynamically 

plagedrtb$nlights_calib_mean <- plagedrtb$nlights_calib_mean*100
plagedrtbsub$nlights_calib_mean <- plagedrtbsub$nlights_calib_mean*100
plagedrtbsub2$nlights_calib_mean <- plagedrtbsub2$nlights_calib_mean*100

plagedrtb$rtb.placebo_rtb.other <- plagedrtb$rtb.placebo*plagedrtb$rtb.other 
plagedrtbsub$rtb.placebo_rtb.other <- plagedrtbsub$rtb.placebo*plagedrtbsub$rtb.other
plagedrtbsub2$rtb.placebo_rtb.other <- plagedrtbsub2$rtb.placebo*plagedrtbsub2$rtb.other

plagedrtb$rtb.placebo_nearborder <- plagedrtb$rtb.placebo*plagedrtb$nearborder 
plagedrtbsub$rtb.placebo_nearborder <- plagedrtbsub$rtb.placebo*plagedrtbsub$nearborder

plagedrtb$rtb.placebo_best_neighbors_sum_binary <- plagedrtb$rtb.placebo*plagedrtb$best_neighbors_sum_binary
plagedrtbsub$rtb.placebo_best_neighbors_sum_binary <- plagedrtbsub$rtb.placebo*plagedrtbsub$best_neighbors_sum_binary

plagedrtb$rtb.placebo_best_foreign_neighbors_sum_binary <- plagedrtb$rtb.placebo*plagedrtb$best_foreign_neighbors_sum_binary
plagedrtbsub$rtb.placebo_best_foreign_neighbors_sum_binary <- plagedrtbsub$rtb.placebo*plagedrtbsub$best_foreign_neighbors_sum_binary

## rcb placebo data
plagedrcb <- read.csv("panel.placebo.rcb.t1_GED_2020.csv", header = T) #placebo data for rcb, all provinces
plagedrcbsub <- read.csv("panel.placebo.rcb.t1.subset_GED_2020.csv", header = T) #subset placebo rcb to provinces in countries with rcb 
plagedrcbsub2 <- read.csv("panel.placebo.dynamic.subset.rcb.t1_GED_2020.csv", header = T) #subset main panel to provinces in countries with rtb dynamically 

plagedrcb$nlights_calib_mean <- plagedrcb$nlights_calib_mean*100
plagedrcbsub$nlights_calib_mean <- plagedrcbsub$nlights_calib_mean*100
plagedrcbsub2$nlights_calib_mean <- plagedrcbsub2$nlights_calib_mean*100

plagedrcb$rcb.placebo_rcb.other <- plagedrcb$rcb.placebo*plagedrcb$rcb.other 
plagedrcbsub$rcb.placebo_rcb.other <- plagedrcbsub$rcb.placebo*plagedrcbsub$rcb.other
plagedrcbsub2$rcb.placebo_rcb.other <- plagedrcbsub2$rcb.placebo*plagedrcbsub2$rcb.other

## rsb placebo data
plagedrsb <- read.csv("panel.placebo.rsb.t1_GED_2020.csv", header = T) #placebo data for rsb, all provinces
plagedrsbsub <- read.csv("panel.placebo.rsb.t1.subset_GED_2020.csv", header = T) #subset placebo rsb to provinces in countries with rsb 
plagedrsbsub2 <- read.csv("panel.placebo.dynamic.subset.rsb.t1_GED_2020.csv", header = T) #subset main panel to provinces in countries with rtb dynamically 

plagedrsb$nlights_calib_mean <- plagedrsb$nlights_calib_mean*100
plagedrsbsub$nlights_calib_mean <- plagedrsbsub$nlights_calib_mean*100
plagedrsbsub2$nlights_calib_mean <- plagedrsbsub2$nlights_calib_mean*100

plagedrsb$rsb.placebo_rsb.other <- plagedrsb$rsb.placebo*plagedrsb$rsb.other
plagedrsbsub$rsb.placebo_rsb.other <- plagedrsbsub$rsb.placebo*plagedrsbsub$rsb.other
plagedrsbsub2$rsb.placebo_rsb.other <- plagedrsbsub2$rsb.placebo*plagedrsbsub2$rsb.other

##### Load Africa population data #####
gedpopafrica <- read.csv("panel.subset_Africa_population.csv", header = TRUE) # subset with refugee population in Africa
plagedpopafrica <- read.csv("panel.subset_Africa_population.placebo.rtb.t1_GED_2020.csv", header = TRUE) # placebo version

gedpopafrica$RefPop_Ratio <- gedpopafrica$Total/gedpopafrica$pop #ratio of refugee population to local population

gedpopafrica$large_ref_population_rtb.other <- gedpopafrica$large_ref_population*gedpopafrica$rtb.other  
gedpopafrica$small_ref_population_rtb.other <- gedpopafrica$small_ref_population*gedpopafrica$rtb.other

plagedpopafrica$rtb.placebo_rtb.other <- plagedpopafrica$rtb.placebo*plagedpopafrica$rtb.other  

##### Load UNHCR location data #####
# subset UNHCR data to refugee sites, camps, and settlements open within study period
allsites <- read.csv("camps_settlements_processed.csv", header = TRUE) # in our study period
refsites <- allsites[allsites$loc_type == "Refugee Camp" |
                     allsites$loc_type == "Refugee Settlement",]

refcamps <- allsites[allsites$loc_type == "Refugee Camp",]

refsettlements <- allsites[allsites$loc_type == "Refugee Settlement",]

@

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\title{\LARGE{Reexamining the Effect of Refugees on Civil Conflict:\\A Global Subnational Analysis\\
Supplementary Information}\footnote{Replication materials are available via Harvard University's Dataverse.}
}


\author{Yang-Yang Zhou\thanks{Assistant Professor, Department of Political Science, University of British Columbia, \href{mailto:yangyang.zhou@ubc.ca}{yangyang.zhou@ubc.ca}, \href{https://www.yangyangzhou.com/}{www.yangyangzhou.com}}
\hspace{.02cm} and Andrew Shaver\thanks{Assistant Professor, Department of Political Science, University of California, Merced. Affiliate, CISAC, Stanford University. Email: \href{mailto:ashaver@ucmerced.edu}{ashaver@ucmerced.edu}, \href{www.andrewcshaver.com}{www.andrewcshaver.com}}
}

\date{\today}

%%%%%%%%%%%%%%%%% END OF PREAMBLE %%%%%%%%%%%%%%%%
\renewcommand{\harvardurl}{URL: \url}

\maketitle

%To number supplemental material with 'S': 
\renewcommand{\thesection}{S\arabic{section}}   
\renewcommand{\thetable}{S\arabic{table}}   
\renewcommand{\thefigure}{S\arabic{figure}}
\renewcommand{\theequation}{S\arabic{equation}}


\newpage
\part{Supplementary Information}
\label{SIsec:appendix}

\addtocontents{toc}{\protect\setcounter{tocdepth}{2}}
\tableofcontents

\setstretch{1.2}


%%%%% SUMMARY STATISTICS AND DESCRIPTIVES
\clearpage
\newpage
\section{Summary Statistics}
\label{SIsec:summary}

This section shows descriptive statistics for the variables we use in the analyses.

<<sumstats_sub, eval = TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 7, fig.height = 4, out.width= ".9\\linewidth", fig.align='center', warning=TRUE, message=TRUE, strip.white=TRUE, results='asis'>>=

sumdata <- geddatasub[, c("onset.n",
                       "incidence",
                       "attack",
                       "attack_state_based",
                       "attack_non_state",
                       "attack_one_sided",
                       "best",
                       "best_state_based",
                       "best_non_state",
                       "best_one_sided",
                       "nlights_calib_mean",
                       "rtb", 
                       "rtb.other", 
                       "rcb",
                       "rcb.other",
                       "rsb",
                       "rsb.other",
                       "attack_neighbors_sum",
                       "log_pop_1",
                       "gcp_ppp_1", 
                       "excluded_mean", 
                       "STD", 
                       "SQKM_ADMIN", 
                       "log_bdist2",
                       "log_capdist", 
                       "idpb")] 

colnames(sumdata) <- c("Conflict Onset (until 2008)",
                       "Conflict Incidence (until 2008)",
                       "Violent Events",
                       "Violent Events State Based",
                       "Violent Events Non-State",
                       "Violent Events One Sided",
                       "Battle Deaths",
                       "Battle Deaths State Based",
                       "Battle Deaths Non-State",
                       "Battle Deaths One Sided",
                       "Nighttime Lights Mean",
                       "Refugee Presence",
                       "Other Province Refugee Presence",
                       "Refugee Camps",
                       "Other Province Refugee Camps",
                       "Refugee Settlements",
                       "Other Province Refugee Settlements",
                       "Neighboring Violent Events",
                       "Lagged Population (logged)",
                       "Lagged GDP",
                       "Excluded Ethnic Groups Mean",
                       "Terrain Ruggedness",
                       "Province size (sqkm)",
                       "Distance from border (km logged)",
                       "Distance from capital (km logged)",
                       "IDP Presence")
                    
sumtable <- capture.output({stargazer(sumdata, 
                                      header = FALSE,
                                      digits = 2,
                                      digits.extra = 3,
                                      summary.stat = c("n", "mean", "sd", "min", "median", "max"),
                                      summary.logical = TRUE,
                                   label="tab:sumstats")})

sumtable <- gsub("\\begin{tabular}","\\resizebox{.95\\textwidth}{!}{\\begin{tabular}", sumtable, fixed=T)
sumtable <- gsub("\\end{tabular}", "\\end{tabular}}", sumtable, fixed=T)
sumtable <- gsub("\\caption{}", "\\caption{Summary Statistics for Main Data of Only Countries that have Hosted Refugees}", sumtable, fixed=T)

cat(sumtable)

@

<<sumstats_full, eval = FALSE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 7, fig.height = 4, out.width= ".9\\linewidth", fig.align='center', warning=TRUE, message=TRUE, strip.white=TRUE, results='asis'>>=

sumdata <- geddata[, c("onset.n",
                       "incidence",
                       "attack",
                       "attack_state_based",
                       "attack_non_state",
                       "attack_one_sided",
                       "best",
                       "best_state_based",
                       "best_non_state",
                       "best_one_sided",
                       "nlights_calib_mean",
                       "rtb", 
                       "rtb.other", 
                       "rcb",
                       "rcb.other",
                       "rsb",
                       "rsb.other",
                       "attack_neighbors_sum",
                       "log_pop_1",
                       "gcp_ppp_1", 
                       "excluded_mean", 
                       "STD", 
                       "SQKM_ADMIN", 
                       "log_bdist2",
                       "log_capdist", 
                       "idpb")] 

colnames(sumdata) <- c("Conflict Onset (until 2008)",
                       "Conflict Incidence (until 2008)",
                       "Violent Events",
                       "Violent Events State Based",
                       "Violent Events Non-State",
                       "Violent Events One Sided",
                       "Battle Deaths",
                       "Battle Deaths State Based",
                       "Battle Deaths Non-State",
                       "Battle Deaths One Sided",
                       "Nighttime Lights Mean",
                       "Refugee Presence",
                       "Other Province Refugee Presence",
                       "Refugee Camps",
                       "Other Province Refugee Camps",
                       "Refugee Settlements",
                       "Other Province Refugee Settlements",
                       "Neighboring Violent Events",
                       "Lagged Population (logged)",
                       "Lagged GDP",
                       "Excluded Ethnic Groups Mean",
                       "Terrain Ruggedness",
                       "Province size (sqkm)",
                       "Distance from border (km logged)",
                       "Distance from capital (km logged)",
                       "IDP Presence")
                    
sumtable <- capture.output({stargazer(sumdata, 
                                      header = FALSE,
                                      digits = 2,
                                      digits.extra = 3,
                                      summary.stat = c("n", "mean", "sd", "min", "median", "max"),
                                      summary.logical = TRUE,
                                   label="tab:sumstats")})

sumtable <- gsub("\\begin{tabular}","\\resizebox{.95\\textwidth}{!}{\\begin{tabular}", sumtable, fixed=T)
sumtable <- gsub("\\end{tabular}", "\\end{tabular}}", sumtable, fixed=T)
sumtable <- gsub("\\caption{}", "\\caption{Summary Statistics for Full Data of All Countries}", sumtable, fixed=T)

cat(sumtable)

@

\newpage
Table \ref{tab:refsitesnumprovyr} shows the number of province-year observations by number of refugee sites in the data (e.g. 26, 237 province-years have no refugee sites, 1421 province-years have 1 refugee site, etc.). Next, Table \ref{tab:refsitesnumregion} breaks down the number of refugee camps, settlements, and sites (both camps and settlements) by region. 

<<refsitesnumprovyr, eval = TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 7, fig.height = 4, out.width= ".9\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, results='asis'>>=

rt <- data.frame(table(geddatasub$rt))

colnames(rt) <- c("Refugee Sites", "Province-Years")

print(xtable(cbind(rt[1:13,], rt[14:26,], rt[27:39,]), 
             caption = "Frequency of Province-Years by Number of Refugee Sites",
             label = "tab:refsitesnumprovyr", 
             digits = 0,
             split.table = 200,
             align = "rrl|rl|rl"
             ), 
     include.rownames = F, 
     scalebox = .95)

@

<<refsitesnumregion, eval = TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 7, fig.height = 4, out.width= ".9\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, results='asis'>>=

refsitesregion <- bind_rows(
  refcamps %>% 
    group_by(REGION) %>% 
    dplyr::count(name = "n_sites") %>%
    mutate(type = "Refugee Camps") %>%
    pivot_wider(names_from = REGION, values_from = n_sites),
  refsettlements %>% 
    group_by(REGION) %>% 
    dplyr::count(name = "n_sites") %>%
    mutate(type = "Refugee Settlements") %>%
    pivot_wider(names_from = REGION, values_from = n_sites),
  refsites %>% 
    group_by(REGION) %>% 
    dplyr::count(name = "n_sites") %>%
    mutate(type = "Refugee Sites") %>%
    pivot_wider(names_from = REGION, values_from = n_sites)
) %>%
  mutate(across(-type, ~coalesce(.x, 0))) %>%
  column_to_rownames(var = "type") 

print(xtable(t(refsitesregion), 
             caption = "Number of Refugee Camps, Settlements, and Sites (Camps + Settlements) per Region",
             label = "tab:refsitesnumregion", 
             digits = 0,
             type = "latex",
             split.table = 200
             ), 
     NA.string = "0",
     include.rownames = T, 
     scalebox = .95)

@

\newpage
Figure \ref{fig:Conflict_time_sub} shows the main conflict outcomes across provinces over time. Because of the spike in battle deaths in 1994 due to the Rwandan genocide, we log this outcome in our analyses. 

<<Conflict_time_sub, eval = TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 8, fig.height = 6, out.width= "1\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap="This figure shows the propensity of conflict onset and incidence (1990--2008) across time in plots 1 and 2; and the number of violent events and battle deaths (1990--2018), across time in plots 3 and 4, using the main data.">>=

# calculate proportion of onset and incidence across time for full and subset data, plot only for subset

# Onset, only goes to 2008
onsetprop <- prop.table(table(geddata[geddata$year<2009,]$onset.n, 
                              geddata[geddata$year<2009,]$year), 2) #prop of onset by year, full sample

onsetpropsub <- prop.table(table(geddatasub[geddatasub$year<2009,]$onset.n, 
                                 geddatasub[geddatasub$year<2009,]$year), 2) #prop of onset by year, sub

onsetpropplot <- rbind(
                 #data.frame(cbind(t(onsetprop), year = 1990:2008, group = "All provinces")),
                 data.frame(cbind(t(onsetpropsub), year = 1990:2008, group = "Subset provinces"))
                 )

onsetpropplot[,-(4)] <- as.data.frame(lapply(onsetpropplot[,-(4)], 
                               function(x) as.numeric(as.character(x))))

onsetplot <- ggplot(onsetpropplot, 
                    aes(x=year, y=X1)) +
  geom_line() +
  ylim(0,.4) +
  ylab("Proportion of Onset\n across Provinces") +
  xlab("Year") +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.position = "null"
        ) 

# Incidence, only goes to 2008
incidenceprop <- prop.table(table(geddata[geddata$year<2009,]$incidence, 
                                  geddata[geddata$year<2009,]$year), 2) 
                                      #prop of incidence by year, full sample

incidencepropsub <- prop.table(table(geddatasub[geddatasub$year<2009,]$incidence, 
                                     geddatasub[geddatasub$year<2009,]$year), 2) 
                                                    #prop of incidence by year, sub

incidencepropplot <- rbind(
                 #data.frame(cbind(t(incidenceprop), year = 1990:2008, group = "All provinces")),
                 data.frame(cbind(t(incidencepropsub), year = 1990:2008, group = "Subset provinces"))
                 )

incidencepropplot[,-(4)] <- as.data.frame(lapply(incidencepropplot[,-(4)], 
                               function(x) as.numeric(as.character(x))))

incidenceplot <- ggplot(incidencepropplot, 
                        aes(x=year, y=X1)) +
  geom_line() +
  ylim(0,.4) +
  ylab("Proportion of Incidence\n across Provinces") +
  xlab("Year") +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.position = "null"
        ) 

# Violent Events
attacksumsub <- geddatasub %>% 
  group_by(year) %>%
  summarize(sum_attack = sum(attack, na.rm = TRUE))

attackplot <- ggplot(attacksumsub, 
                        aes(x=year, y=sum_attack)) +
  geom_line() +
  #ylim(0,.4) +
  ylab("Total Number of Violent Events") +
  xlab("Year") +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.position = "null"
        ) 

# Battle deaths
bestsumsub <- geddatasub %>% 
  group_by(year) %>%
  summarize(sum_best = sum(best, na.rm = TRUE))

bestplot <- ggplot(bestsumsub, 
                        aes(x=year, y=sum_best)) +
  geom_line() +
  #ylim(0,.4) +
  ylab("Total Number of Battle Deaths (log scale)") +
  xlab("Year") +
  scale_y_log10(labels = scales::comma) + 
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.position = "null"
        ) 

#plot
(onsetplot + incidenceplot) /
(attackplot + bestplot)

@

%%%%%% ADDITIONAL DATA DESCRIPTION
\clearpage
\newpage
\section{Data Sources}
\label{SIsec:datadescript}

The UNHCR georeferenced refugee locations dataset that we introduce in this paper can be accessed in our replication files. Specifically, the raw file from the UNHCR is \verb|wrl_ppl_poc_07_01_2020.xls|, and the data that we additionally processed (see description in the manuscript) for use in the main analysis is \verb|camps_settlements_processed.csv|.

Although we include our data cleaning code for transparency, we do not include within our replication files the following data that we use as inputs into the cleaning code, because they are either proprietary or already available publicly online. We detail how to find these data below. 

\subsection*{UCDP GED}

The Uppsala Conflict Data Program's (UCDP) Georeferenced Event Dataset (GED) (version 19.1) \citet{Sundberg:2013} can be accessed at:\\

\href{https://ucdp.uu.se/downloads/olddw.html}{ucdp.uu.se/downloads/olddw.html}\\

UCDP's GED separate Syria dataset (version 652.1601.1911) can be accessed at:\\

\href{https://ucdp.uu.se/downloads/olddw.html}{ucdp.uu.se/downloads/olddw.html}

\subsection*{PRIO Conflict Sites}

The Peace Research Institute Oslo (PRIO) Conflict Site (1989-2008) data (version 3.0) \citet{Dittrich:2012} can be accessed at:\\

\href{https://www.prio.org/Data/Armed-Conflict/Conflict-Site/}{prio.org/Data/Armed-Conflict/Conflict-Site/}

\subsection*{wzoneData}

The wzoneData: Zones of Armed Conflicts data (corresponding to UCDP GED version 19.1) developed by \cite{Kikuta:2020} are available at:\\

\href{https://dataverse.harvard.edu/dataverse/kyosuke_kkt}{dataverse.harvard.edu/dataverse/kyosukekkt}

\subsection*{CShapes Shapefile}

The CShapes shapefile (version 0.6) \citep{weidmann2010geography} can be accessed at: \\

\href{http://nils.weidmann.ws/projects/cshapes/shapefile.html}{nils.weidmann.ws/projects/cshapes/shapefile.html}

\subsection*{Administrative Unit Shapefile}

The 1998 1st level administrative (``province'') boundaries used in this project can be procured from ESRI: \\

\href{https://www.esri.com/en-us/home}{www.esri.com}

\subsection*{Universal Transverse Mercator Shapefile}

The global Universal Transverse Mercator grid shapefile (updated as of 11/11/2018) can be access at: \\

\href{http://hub.arcgis.com/datasets/esri::world-utm-grid}{hub.arcgis.com/datasets/esri::world-utm-grid}

\subsection*{PRIO-GRID}

PRIO-GRID \citep{Tollefsen:2012} data (version 2.0) can be accessed at:\\

\href{https://grid.prio.org/#/download}{grid.prio.org/download}\\

The PRIO-GRID polygon (cell) shapefile can be accessed at: \\

\href{https://grid.prio.org/#/extensions}{grid.prio.org/extensions}\\

The PRIO-GRID centroids shapefile no longer appears on PRIO-GRID website. Individuals interested in obtaining these data might reach out to the following contacts listed on the PRIO-GRID website: andreas@prio.org; marteg@prio.org.

\subsection*{Terrain Ruggedness}

Data on terrain ruggedness (original version) from \cite{Shaver:2016} can be accessed at:\\

\url{https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/WXUZBN}



%%%%% CORRELATION PLOT
\clearpage
\newpage
\section{Correlation Coefficient Plots}
\label{SIsec:corr}

Figure \ref{fig:Corrplot} shows the correlation coefficient plot between each pair of explanatory and control variables used in the main analysis. The correlation coefficients are all well under .9, which is the threshold at which multicollinearity is certain \citep{Dohoo:1997}. Thus, we are not concerned with multicollinearity.

<<Corrplot, eval = TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 8, fig.height = 8, out.width= ".9\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("These plots show the correlation coefficient between each pair of explanatory and control variables used in the main analysis.")>>=

corrdata <- geddatasub[,c("rtb", 
                       "rtb.other", 
                       "attack_neighbors_sum",
                       "log_pop_1",
                       "gcp_ppp_1", 
                       "excluded_mean", 
                       "STD", 
                       "SQKM_ADMIN", 
                       "log_bdist2",
                       "log_capdist", 
                       "idpb")]

corrmat <- round(cor(corrdata,
               use = "complete.obs"),1)

colnames(corrmat) <- rownames(corrmat) <- c("Refugee Presence",
                       "Other Province Refugee Presence",
                       "Neighboring Violent Events",
                       "Lagged Population (logged)",
                       "Lagged GDP",
                       "Excluded Ethnic Groups Mean",
                       "Terrain ruggedness",
                       "Province size (sqkm)",
                       "Distance from border (km logged)",
                       "Distance from capital (km logged)",
                       "IDP Presence")

# plot
corrplot <- ggcorrplot(corrmat, hc.order = FALSE, 
                       type = "lower", lab = TRUE)
corrplot

@

%%%%% MAIN ANALYSIS RERUN HERE
<<MainOnsetModelsH1, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

# These are the models we run, save as rdata, and bring in output for plotting:
# If perfect separation, because too many countries with no conflict in the full dataset, uninformitive priors are chosen based on the stan-dev Prior Choice wiki: https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations

## Treatment model 1, with full data
onset.h1.full <- bayesglm(onset.n ~ rtb + 
                       rtb.other + 
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata[geddata$year < 2009,],
               prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h1.full)

## Placebo model
onset.h1.full.pla <- bayesglm(onset.n ~ rtb.placebo + 
                       rtb.other + 
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtb[plagedrtb$year < 2009,],
               prior.scale = 1, prior.df = 3,
               family = binomial(link = "logit")) 

#summary(onset.h1.full.pla)

## Treatment model 3, with subset data
onset.h1.sub <- bayesglm(onset.n ~ rtb + 
                       rtb.other + 
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h1.sub)

## Placebo model
onset.h1.sub.pla <- bayesglm(onset.n ~ rtb.placebo + 
                       rtb.other + 
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub[plagedrtbsub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h1.sub.pla)

# Generate predicted probs
onset.h1.full.dplot <- pred.bi(onset.h1.full) #generate predicted probs
onset.h1.full.dplot$Group <- "Actual presence" #label the type of estimate

onset.h1.full.pla.dplot <- pred.bi(onset.h1.full.pla)
onset.h1.full.pla.dplot$Group <- "Placebo presence"

plot.onset.h1.full <- rbind(onset.h1.full.dplot, #combine for ggplot
                            onset.h1.full.pla.dplot)

onset.h1.sub.dplot <- pred.bi(onset.h1.sub) #generate predicted probs
onset.h1.sub.dplot$Group <- "Actual presence" #label the type of estimate

onset.h1.sub.pla.dplot <- pred.bi(onset.h1.sub.pla)
onset.h1.sub.pla.dplot$Group <- "Placebo presence"

plot.onset.h1.sub <- rbind(onset.h1.sub.dplot, #combine for ggplot
                           onset.h1.sub.pla.dplot)

# # save output as Rdata files
# save(onset.h1.full, file = "onset.h1.full.Rdata")
# save(onset.h1.full.pla, file = "onset.h1.full.pla.Rdata")
# save(plot.onset.h1.full, file = "plot.onset.h1.full.Rdata")
# save(onset.h1.sub, file = "onset.h1.sub.Rdata")
# save(onset.h1.sub.pla, file = "onset.h1.sub.pla.Rdata")
# save(plot.onset.h1.sub, file = "plot.onset.h1.sub.Rdata")

@

<<MainIncidenceModelsH1, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 1, with full data
incidence.h1.full <- bayesglm(incidence ~ rtb + 
                       rtb.other + 
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata[geddata$year < 2009,],
               prior.scale = 1, prior.df = 3,
               family = binomial(link = "logit")) 

#summary(incidence.h1.full)

## Placebo model
incidence.h1.full.pla <- bayesglm(incidence ~ rtb.placebo + 
                       rtb.other + 
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtb[plagedrtb$year < 2009,],
               prior.scale = 1, prior.df = 3,
               family = binomial(link = "logit")) 

#summary(incidence.h1.full.pla)

## Treatment model 3, with subset data
incidence.h1.sub <- bayesglm(incidence ~ rtb + 
                       rtb.other + 
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               #prior.scale = 1, prior.df = 3,
               family = binomial(link = "logit")) 

#summary(incidence.h1.sub)

## Placebo model
incidence.h1.sub.pla <- bayesglm(incidence ~ rtb.placebo + 
                       rtb.other + 
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub[plagedrtbsub$year < 2009,],
               #prior.scale = 1, prior.df = 3,
               family = binomial(link = "logit")) 

#summary(incidence.h1.sub.pla)

# Generate predicted probs
incidence.h1.full.dplot <- pred.bi(incidence.h1.full) #generate predicted probs
incidence.h1.full.dplot$Group <- "Actual presence" #label the type of estimate

incidence.h1.full.pla.dplot <- pred.bi(incidence.h1.full.pla)
incidence.h1.full.pla.dplot$Group <- "Placebo presence"

plot.incidence.h1.full <- rbind(incidence.h1.full.dplot, #combine for ggplot
                            incidence.h1.full.pla.dplot)

incidence.h1.sub.dplot <- pred.bi(incidence.h1.sub) #generate predicted probs
incidence.h1.sub.dplot$Group <- "Actual presence" #label the type of estimate

incidence.h1.sub.pla.dplot <- pred.bi(incidence.h1.sub.pla)
incidence.h1.sub.pla.dplot$Group <- "Placebo presence"

plot.incidence.h1.sub <- rbind(incidence.h1.sub.dplot, #combine for ggplot
                           incidence.h1.sub.pla.dplot)

# # save output as Rdata files
# save(incidence.h1.full, file = "incidence.h1.full.Rdata")
# save(incidence.h1.full.pla, file = "incidence.h1.full.pla.Rdata")
# save(plot.incidence.h1.full, file = "plot.incidence.h1.full.Rdata")
# save(incidence.h1.sub, file = "incidence.h1.sub.Rdata")
# save(incidence.h1.sub.pla, file = "incidence.h1.sub.pla.Rdata")
# save(plot.incidence.h1.sub, file = "plot.incidence.h1.sub.Rdata")

@

<<MainAttackModelsH1, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 1, with full data
attack.h1.full <- lm_robust(attack ~ rtb + 
                       rtb.other + 
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata)

#summary(attack.h1.full)

## Placebo model
attack.h1.full.pla <- lm_robust(attack ~ rtb.placebo + 
                       rtb.other + 
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtb)

#summary(attack.h1.full.pla)


## Treatment model 3, with subset data
attack.h1.sub <- lm_robust(attack ~ rtb + 
                       rtb.other + 
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(attack.h1.sub)

## Placebo model
attack.h1.sub.pla <- lm_robust(attack ~ rtb.placebo + 
                       rtb.other + 
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub)

#summary(attack.h1.sub.pla)


# Generate predicted probs
attack.h1.full.dplot <- tidy(attack.h1.full)[attack.h1.full$term == "rtb",] 
attack.h1.full.dplot$Group <- "Actual presence" #label the type of estimate

attack.h1.full.pla.dplot <- tidy(attack.h1.full.pla)[attack.h1.full.pla$term == "rtb.placebo",] 
attack.h1.full.pla.dplot$Group <- "Placebo presence"

plot.attack.h1.full <- rbind(attack.h1.full.dplot, #combine for ggplot
                             attack.h1.full.pla.dplot)
 
attack.h1.sub.dplot <- tidy(attack.h1.sub)[attack.h1.sub$term == "rtb",] 
attack.h1.sub.dplot$Group <- "Actual presence" #label the type of estimate


attack.h1.sub.pla.dplot <- tidy(attack.h1.sub.pla)[attack.h1.sub.pla$term == "rtb.placebo",] 
attack.h1.sub.pla.dplot$Group <- "Placebo presence"
 
plot.attack.h1.sub <- rbind(attack.h1.sub.dplot, #combine for ggplot
                            attack.h1.sub.pla.dplot)

@

<<MainBattleDeathModelsH1, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 1, with full data
best.h1.full <- lm_robust(log(best+1) ~ rtb + 
                       rtb.other + 
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = geddata)

#summary(best.h1.full)

## Placebo model
best.h1.full.pla <- lm_robust(log(best+1) ~ rtb.placebo + 
                       rtb.other + 
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = plagedrtb)

#summary(best.h1.full.pla)


## Treatment model 3, with subset data
best.h1.sub <- lm_robust(log(best+1) ~ rtb + 
                       rtb.other + 
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = geddatasub)

#summary(best.h1.sub)

## Placebo model
best.h1.sub.pla <- lm_robust(log(best+1) ~ rtb.placebo + 
                       rtb.other + 
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = plagedrtbsub)

#summary(best.h1.sub.pla)


# Generate predicted probs
best.h1.full.dplot <- tidy(best.h1.full)[best.h1.full$term == "rtb",] 
best.h1.full.dplot$Group <- "Actual presence" #label the type of estimate

best.h1.full.pla.dplot <- tidy(best.h1.full.pla)[best.h1.full.pla$term == "rtb.placebo",] 
best.h1.full.pla.dplot$Group <- "Placebo presence" #label the type of estimate

plot.best.h1.full <- rbind(best.h1.full.dplot, #combine for ggplot
                                         best.h1.full.pla.dplot)

best.h1.sub.dplot <- tidy(best.h1.sub)[best.h1.sub$term == "rtb",] 
best.h1.sub.dplot$Group <- "Actual presence" #label the type of estimate

best.h1.sub.pla.dplot <- tidy(best.h1.sub.pla)[best.h1.sub.pla$term == "rtb.placebo",] 
best.h1.sub.pla.dplot$Group <- "Placebo presence" #label the type of estimate

plot.best.h1.sub <- rbind(best.h1.sub.dplot, #combine for ggplot
                                        best.h1.sub.pla.dplot)

@

<<MainOnsetModelsH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 1, with full data
onset.h2.full <- bayesglm(onset.n ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata[geddata$year < 2009,],
               prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h2.full)

## Placebo model
onset.h2.full.pla <- bayesglm(onset.n ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtb[plagedrtb$year < 2009,],
               prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h2.full.pla)

## Treatment model 3, with subset data
onset.h2.sub <- bayesglm(onset.n ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h2.sub)

## Placebo model
onset.h2.sub.pla <- bayesglm(onset.n ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub[plagedrtbsub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h2.sub.pla)

# Generate predicted probs
onset.h2.full.dplot <- pred.bi.int(onset.h2.full) #generate predicted probs with interaction term
onset.h2.full.dplot$Group <- "Actual presence" #label the type of estimate

onset.h2.full.pla.dplot <- pred.bi.int(onset.h2.full.pla)
onset.h2.full.pla.dplot$Group <- "Placebo presence"

plot.onset.h2.full <- rbind(onset.h2.full.dplot, #combine for ggplot
                            onset.h2.full.pla.dplot)

onset.h2.sub.dplot <- pred.bi.int(onset.h2.sub) #generate predicted probs
onset.h2.sub.dplot$Group <- "Actual presence" #label the type of estimate

onset.h2.sub.pla.dplot <- pred.bi.int(onset.h2.sub.pla)
onset.h2.sub.pla.dplot$Group <- "Placebo presence"

plot.onset.h2.sub <- rbind(onset.h2.sub.dplot, #combine for ggplot
                           onset.h2.sub.pla.dplot)

# save output as Rdata files
# save(onset.h2.full, file = "onset.h2.full.Rdata")
# save(onset.h2.full.pla, file = "onset.h2.full.pla.Rdata")
# save(plot.onset.h2.full, file = "plot.onset.h2.full.Rdata")
# save(onset.h2.sub, file = "onset.h2.sub.Rdata")
# save(onset.h2.sub.pla, file = "onset.h2.sub.pla.Rdata")
# save(plot.onset.h2.sub, file = "plot.onset.h2.sub.Rdata")

@

<<MainIncidenceModelsH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 1, with full data
incidence.h2.full <- bayesglm(incidence ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata[geddata$year < 2009,],
               prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h2.full)

## Placebo model
incidence.h2.full.pla <- bayesglm(incidence ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtb[plagedrtb$year < 2009,],
               prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h2.full.pla)

## Treatment model 3, with subset data
incidence.h2.sub <- bayesglm(incidence ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h2.sub)

## Placebo model
incidence.h2.sub.pla <- bayesglm(incidence ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub[plagedrtbsub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h2.sub.pla)

# Generate predicted probs
incidence.h2.full.dplot <- pred.bi.int(incidence.h2.full) #generate predicted probs with interaction term
incidence.h2.full.dplot$Group <- "Actual presence" #label the type of estimate

incidence.h2.full.pla.dplot <- pred.bi.int(incidence.h2.full.pla)
incidence.h2.full.pla.dplot$Group <- "Placebo presence"

plot.incidence.h2.full <- rbind(incidence.h2.full.dplot, #combine for ggplot
                            incidence.h2.full.pla.dplot)

incidence.h2.sub.dplot <- pred.bi.int(incidence.h2.sub) #generate predicted probs
incidence.h2.sub.dplot$Group <- "Actual presence" #label the type of estimate

incidence.h2.sub.pla.dplot <- pred.bi.int(incidence.h2.sub.pla)
incidence.h2.sub.pla.dplot$Group <- "Placebo presence"

plot.incidence.h2.sub <- rbind(incidence.h2.sub.dplot, #combine for ggplot
                           incidence.h2.sub.pla.dplot)

# # save output as Rdata files
# save(incidence.h2.full, file = "incidence.h2.full.Rdata")
# save(incidence.h2.full.pla, file = "incidence.h2.full.pla.Rdata")
# save(plot.incidence.h2.full, file = "plot.incidence.h2.full.Rdata")
# save(incidence.h2.sub, file = "incidence.h2.sub.Rdata")
# save(incidence.h2.sub.pla, file = "incidence.h2.sub.pla.Rdata")
# save(plot.incidence.h2.sub, file = "plot.incidence.h2.sub.Rdata")

@

<<MainAttackModelsH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 2, with full data
attack.h2.full <- lm_robust(attack ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata)

#summary(attack.h2.full)

## Placebo model
attack.h2.full.pla <- lm_robust(attack ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtb)

#summary(attack.h2.full.pla)


## Treatment model 3, with subset data
attack.h2.sub <- lm_robust(attack ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(attack.h2.sub)

## Placebo model
attack.h2.sub.pla <- lm_robust(attack ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub)

#summary(attack.h2.sub.pla)


# Generate predicted probs
attack.h2.full.dplot <- pred.lm.int.r(attack.h2.full)
attack.h2.full.dplot$Group <- "Actual presence" #label the type of estimate

attack.h2.full.pla.dplot <- pred.lm.int.r(attack.h2.full.pla)
attack.h2.full.pla.dplot$Group <- "Placebo presence"

plot.attack.h2.full <- rbind(attack.h2.full.dplot, #combine for ggplot
                             attack.h2.full.pla.dplot)
 
attack.h2.sub.dplot <- pred.lm.int.r(attack.h2.sub)
attack.h2.sub.dplot$Group <- "Actual presence" #label the type of estimate

attack.h2.sub.pla.dplot <- pred.lm.int.r(attack.h2.sub.pla)
attack.h2.sub.pla.dplot$Group <- "Placebo presence"

plot.attack.h2.sub <- rbind(attack.h2.sub.dplot, #combine for ggplot
                            attack.h2.sub.pla.dplot)
@

<<MainBattleDeathModelsH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 1, with full data
best.h2.full <- lm_robust(log(best+1) ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year), 
               data = geddata)

#summary(best.h2.full)

## Placebo model
best.h2.full.pla <- lm_robust(log(best+1) ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtb)

#summary(best.h2.full.pla)


## Treatment model 3, with subset data
best.h2.sub <- lm_robust(log(best+1) ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(best.h2.sub)

## Placebo model
best.h2.sub.pla <- lm_robust(log(best+1) ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year), 
               data = plagedrtbsub)

#summary(best.h2.sub.pla)


# Generate predicted probs
best.h2.full.dplot <- pred.lm.int.r(best.h2.full)
best.h2.full.dplot$Group <- "Actual presence" #label the type of estimate

best.h2.full.pla.dplot <- pred.lm.int.r(best.h2.full.pla)
best.h2.full.pla.dplot$Group <- "Placebo presence"

plot.best.h2.full <- rbind(best.h2.full.dplot, #combine for ggplot
                             best.h2.full.pla.dplot)
 
best.h2.sub.dplot <- pred.lm.int.r(best.h2.sub)
best.h2.sub.dplot$Group <- "Actual presence" #label the type of estimate

best.h2.sub.pla.dplot <- pred.lm.int.r(best.h2.sub.pla)
best.h2.sub.pla.dplot$Group <- "Placebo presence"

plot.best.h2.sub <- rbind(best.h2.sub.dplot, #combine for ggplot
                            best.h2.sub.pla.dplot)

@


%%%%% ANALYSIS OF OTHER CONFLICT TYPES
\newpage
\section{Analysis of Other Conflict Types}
\label{SIsec:conflicttypes}

This section shows the main (H1) and secondary (H2) analyses on other conflict types included in the UCDP GED data: state-based, non-state, and one-sided violent events and battle deaths. Consistent with the analyses in the paper, Figure \ref{fig:GEDtypes_ModelsH1_sub} shows that there is no effect of refugee presence on the various types of conflict, and Figure \ref{fig:GEDtypes_ModelsH2_sub} shows that there are negative effects of concentrated refugee presence on several of the conflict types -- state-based and one-sided violent events and state-based battle deaths and null effects for dispersed refugee presence. This is also consistent with our theorized predictions. 

Note that two placebo test coefficients are positive, implying that provinces hosting refugee sites in the future may be experiencing more conflict in the present, which goes against our selection concerns that refugees are able to select into provinces with less conflict. 

<<MainStateAttackModelsH1, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 1, with full data
attack_state_based.h1.full <- lm_robust(attack_state_based ~ rtb + 
                       rtb.other + 
                       attack_state_based_1 +
                       attack_non_state_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata)

#summary(attack_state_based.h1.full)

## Placebo model
attack_state_based.h1.full.pla <- lm_robust(attack_state_based ~ rtb.placebo + 
                       rtb.other + 
                       attack_state_based_1 +
                       attack_non_state_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtb)

#summary(attack_state_based.h1.full.pla)


## Treatment model 3, with subset data
attack_state_based.h1.sub <- lm_robust(attack_state_based ~ rtb + 
                       rtb.other + 
                       attack_state_based_1 +
                       attack_non_state_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(attack_state_based.h1.sub)

## Placebo model
attack_state_based.h1.sub.pla <- lm_robust(attack_state_based ~ rtb.placebo + 
                       rtb.other + 
                       attack_state_based_1 +
                       attack_non_state_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = plagedrtbsub)

#summary(attack_state_based.h1.sub.pla)


# Generate predicted probs
attack_state_based.h1.full.dplot <- tidy(attack_state_based.h1.full)[attack_state_based.h1.full$term == "rtb",] 
attack_state_based.h1.full.dplot$Group <- "Actual presence" #label the type of estimate

attack_state_based.h1.full.pla.dplot <- tidy(attack_state_based.h1.full.pla)[attack_state_based.h1.full.pla$term == 
                                                                               "rtb.placebo",] 
attack_state_based.h1.full.pla.dplot$Group <- "Placebo presence" #label the type of estimate

plot.attack_state_based.h1.full <- rbind(attack_state_based.h1.full.dplot, #combine for ggplot
                                         attack_state_based.h1.full.pla.dplot)

attack_state_based.h1.sub.dplot <- tidy(attack_state_based.h1.sub)[attack_state_based.h1.sub$term == "rtb",] 
attack_state_based.h1.sub.dplot$Group <- "Actual presence" #label the type of estimate

attack_state_based.h1.sub.pla.dplot <- tidy(attack_state_based.h1.sub.pla)[attack_state_based.h1.sub.pla$term == 
                                                                             "rtb.placebo",] 
attack_state_based.h1.sub.pla.dplot$Group <- "Placebo presence" #label the type of estimate

plot.attack_state_based.h1.sub <- rbind(attack_state_based.h1.sub.dplot, #combine for ggplot
                                        attack_state_based.h1.sub.pla.dplot)

@

<<MainNonStateAttackModelsH1, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 1, with full data
attack_non_state.h1.full <- lm_robust(attack_non_state ~ rtb + 
                       rtb.other + 
                       attack_non_state_1 +
                       attack_state_based_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata)

#summary(attack_non_state.h1.full)

## Placebo model
attack_non_state.h1.full.pla <- lm_robust(attack_non_state ~ rtb.placebo + 
                       rtb.other + 
                       attack_non_state_1 +
                       attack_state_based_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtb)

#summary(attack_non_state.h1.full.pla)


## Treatment model 3, with subset data
attack_non_state.h1.sub <- lm_robust(attack_non_state ~ rtb + 
                       rtb.other + 
                       attack_non_state_1 +
                       attack_state_based_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(attack_non_state.h1.sub)

## Placebo model
attack_non_state.h1.sub.pla <- lm_robust(attack_non_state ~ rtb.placebo + 
                       rtb.other + 
                       attack_non_state_1 +
                       attack_state_based_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub)

#summary(attack_non_state.h1.sub.pla)


# Generate predicted probs
attack_non_state.h1.full.dplot <- tidy(attack_non_state.h1.full)[attack_non_state.h1.full$term == "rtb",] 
attack_non_state.h1.full.dplot$Group <- "Actual presence" #label the type of estimate

attack_non_state.h1.full.pla.dplot <- tidy(attack_non_state.h1.full.pla)[attack_non_state.h1.full.pla$term == 
                                                                               "rtb.placebo",] 
attack_non_state.h1.full.pla.dplot$Group <- "Placebo presence" #label the type of estimate

plot.attack_non_state.h1.full <- rbind(attack_non_state.h1.full.dplot, #combine for ggplot
                                         attack_non_state.h1.full.pla.dplot)

attack_non_state.h1.sub.dplot <- tidy(attack_non_state.h1.sub)[attack_non_state.h1.sub$term == "rtb",] 
attack_non_state.h1.sub.dplot$Group <- "Actual presence" #label the type of estimate

attack_non_state.h1.sub.pla.dplot <- tidy(attack_non_state.h1.sub.pla)[attack_non_state.h1.sub.pla$term == 
                                                                             "rtb.placebo",] 
attack_non_state.h1.sub.pla.dplot$Group <- "Placebo presence" #label the type of estimate

plot.attack_non_state.h1.sub <- rbind(attack_non_state.h1.sub.dplot, #combine for ggplot
                                        attack_non_state.h1.sub.pla.dplot)

@

<<MainOneSidedAttackModelsH1, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 1, with full data
attack_one_sided.h1.full <- lm_robust(attack_one_sided ~ rtb + 
                       rtb.other + 
                       attack_one_sided_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata)

#summary(attack_one_sided.h1.full)

## Placebo model
attack_one_sided.h1.full.pla <- lm_robust(attack_one_sided ~ rtb.placebo + 
                       rtb.other + 
                       attack_one_sided_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),    
               data = plagedrtb)

#summary(attack_one_sided.h1.full.pla)


## Treatment model 3, with subset data
attack_one_sided.h1.sub <- lm_robust(attack_one_sided ~ rtb + 
                       rtb.other + 
                       attack_one_sided_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(attack_one_sided.h1.sub)

## Placebo model
attack_one_sided.h1.sub.pla <- lm_robust(attack_one_sided ~ rtb.placebo + 
                       rtb.other + 
                       attack_one_sided_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = plagedrtbsub)

#summary(attack_one_sided.h1.sub.pla)


# Generate predicted probs
attack_one_sided.h1.full.dplot <- tidy(attack_one_sided.h1.full)[attack_one_sided.h1.full$term == "rtb",] 
attack_one_sided.h1.full.dplot$Group <- "Actual presence" #label the type of estimate

attack_one_sided.h1.full.pla.dplot <- tidy(attack_one_sided.h1.full.pla)[attack_one_sided.h1.full.pla$term == 
                                                                               "rtb.placebo",] 
attack_one_sided.h1.full.pla.dplot$Group <- "Placebo presence" #label the type of estimate

plot.attack_one_sided.h1.full <- rbind(attack_one_sided.h1.full.dplot, #combine for ggplot
                                         attack_one_sided.h1.full.pla.dplot)

attack_one_sided.h1.sub.dplot <- tidy(attack_one_sided.h1.sub)[attack_one_sided.h1.sub$term == "rtb",] 
attack_one_sided.h1.sub.dplot$Group <- "Actual presence" #label the type of estimate

attack_one_sided.h1.sub.pla.dplot <- tidy(attack_one_sided.h1.sub.pla)[attack_one_sided.h1.sub.pla$term == 
                                                                             "rtb.placebo",] 
attack_one_sided.h1.sub.pla.dplot$Group <- "Placebo presence" #label the type of estimate

plot.attack_one_sided.h1.sub <- rbind(attack_one_sided.h1.sub.dplot, #combine for ggplot
                                        attack_one_sided.h1.sub.pla.dplot)

@

<<MainStateBattleDeathModelsH1, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 1, with full data
best_state_based.h1.full <- lm_robust(log(best_state_based+1) ~ rtb + 
                       rtb.other + 
                       log(best_state_based_1+1) +
                       log(best_non_state_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = geddata)

#summary(best_state_based.h1.full)

## Placebo model
best_state_based.h1.full.pla <- lm_robust(log(best_state_based+1) ~ rtb.placebo + 
                       rtb.other + 
                       log(best_state_based_1+1) +
                       log(best_non_state_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = plagedrtb)

#summary(best_state_based.h1.full.pla)


## Treatment model 3, with subset data
best_state_based.h1.sub <- lm_robust(log(best_state_based+1) ~ rtb + 
                       rtb.other + 
                       log(best_state_based_1+1) +
                       log(best_non_state_1+1) +
                       attack_neighbors_sum + 
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = geddatasub)

#summary(best_state_based.h1.sub)

## Placebo model
best_state_based.h1.sub.pla <- lm_robust(log(best_state_based+1) ~ rtb.placebo + 
                       rtb.other + 
                       log(best_state_based_1+1) +
                       log(best_non_state_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = plagedrtbsub)

#summary(best_state_based.h1.sub.pla)


# Generate predicted probs
best_state_based.h1.full.dplot <- tidy(best_state_based.h1.full)[best_state_based.h1.full$term == "rtb",] 
best_state_based.h1.full.dplot$Group <- "Actual presence" #label the type of estimate

best_state_based.h1.full.pla.dplot <- tidy(best_state_based.h1.full.pla)[best_state_based.h1.full.pla$term == "rtb.placebo",] 
best_state_based.h1.full.pla.dplot$Group <- "Placebo presence" #label the type of estimate

plot.best_state_based.h1.full <- rbind(best_state_based.h1.full.dplot, #combine for ggplot
                                       best_state_based.h1.full.pla.dplot)

best_state_based.h1.sub.dplot <- tidy(best_state_based.h1.sub)[best_state_based.h1.sub$term == "rtb",] 
best_state_based.h1.sub.dplot$Group <- "Actual presence" #label the type of estimate

best_state_based.h1.sub.pla.dplot <- tidy(best_state_based.h1.sub.pla)[best_state_based.h1.sub.pla$term == "rtb.placebo",] 
best_state_based.h1.sub.pla.dplot$Group <- "Placebo presence" #label the type of estimate

plot.best_state_based.h1.sub <- rbind(best_state_based.h1.sub.dplot, #combine for ggplot
                                      best_state_based.h1.sub.pla.dplot)

@

<<MainNonStateBattleDeathModelsH1, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 1, with full data
best_non_state.h1.full <- lm_robust(log(best_non_state+1) ~ rtb + 
                       rtb.other + 
                       log(best_state_based_1+1) +
                       log(best_non_state_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = geddata)

#summary(best_non_state.h1.full)

## Placebo model
best_non_state.h1.full.pla <- lm_robust(log(best_non_state+1) ~ rtb.placebo + 
                       rtb.other + 
                       log(best_1+1) +
                       log(best_non_state_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = plagedrtb)

#summary(best_non_state.h1.full.pla)


## Treatment model 3, with subset data
best_non_state.h1.sub <- lm_robust(log(best_non_state+1) ~ rtb + 
                       rtb.other + 
                       log(best_state_based_1+1) +
                       log(best_non_state_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = geddatasub)

#summary(best_non_state.h1.sub)

## Placebo model
best_non_state.h1.sub.pla <- lm_robust(log(best_non_state+1) ~ rtb.placebo + 
                       rtb.other + 
                       log(best_state_based_1+1) +
                       log(best_non_state_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = plagedrtbsub)

#summary(best_non_state.h1.sub.pla)


# Generate predicted probs
best_non_state.h1.full.dplot <- tidy(best_non_state.h1.full)[best_non_state.h1.full$term == "rtb",] 
best_non_state.h1.full.dplot$Group <- "Actual presence" #label the type of estimate

best_non_state.h1.full.pla.dplot <- tidy(best_non_state.h1.full.pla)[best_non_state.h1.full.pla$term == "rtb.placebo",] 
best_non_state.h1.full.pla.dplot$Group <- "Placebo presence" #label the type of estimate

plot.best_non_state.h1.full <- rbind(best_non_state.h1.full.dplot, #combine for ggplot
                                     best_non_state.h1.full.pla.dplot)

best_non_state.h1.sub.dplot <- tidy(best_non_state.h1.sub)[best_non_state.h1.sub$term == "rtb",] 
best_non_state.h1.sub.dplot$Group <- "Actual presence" #label the type of estimate

best_non_state.h1.sub.pla.dplot <- tidy(best_non_state.h1.sub.pla)[best_non_state.h1.sub.pla$term == "rtb.placebo",] 
best_non_state.h1.sub.pla.dplot$Group <- "Placebo presence" #label the type of estimate

plot.best_non_state.h1.sub <- rbind(best_non_state.h1.sub.dplot, #combine for ggplot
                                    best_non_state.h1.sub.pla.dplot)

@

<<MainOneSidedBattleDeathModelsH1, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 1, with full data
best_one_sided.h1.full <- lm_robust(log(best_one_sided+1) ~ rtb + 
                       rtb.other + 
                       log(best_one_sided_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = geddata)

#summary(best_one_sided.h1.full)

## Placebo model
best_one_sided.h1.full.pla <- lm_robust(log(best_one_sided+1) ~ rtb.placebo + 
                       rtb.other + 
                       log(best_one_sided_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = plagedrtb)

#summary(best_one_sided.h1.full.pla)


## Treatment model 3, with subset data
best_one_sided.h1.sub <- lm_robust(log(best_one_sided+1) ~ rtb + 
                       rtb.other + 
                       log(best_one_sided_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = geddatasub)

#summary(best_one_sided.h1.sub)

## Placebo model
best_one_sided.h1.sub.pla <- lm_robust(log(best_one_sided+1) ~ rtb.placebo + 
                       rtb.other + 
                       log(best_one_sided_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = plagedrtbsub)

#summary(best_one_sided.h1.sub.pla)


# Generate predicted probs
best_one_sided.h1.full.dplot <- tidy(best_one_sided.h1.full)[best_one_sided.h1.full$term == "rtb",] 
best_one_sided.h1.full.dplot$Group <- "Actual presence" #label the type of estimate

best_one_sided.h1.full.pla.dplot <- tidy(best_one_sided.h1.full.pla)[best_one_sided.h1.full.pla$term == "rtb.placebo",] 
best_one_sided.h1.full.pla.dplot$Group <- "Placebo presence" #label the type of estimate

plot.best_one_sided.h1.full <- rbind(best_one_sided.h1.full.dplot, #combine for ggplot
                                         best_one_sided.h1.full.pla.dplot)

best_one_sided.h1.sub.dplot <- tidy(best_one_sided.h1.sub)[best_one_sided.h1.sub$term == "rtb",] 
best_one_sided.h1.sub.dplot$Group <- "Actual presence" #label the type of estimate

best_one_sided.h1.sub.pla.dplot <- tidy(best_one_sided.h1.sub.pla)[best_one_sided.h1.sub.pla$term == "rtb.placebo",] 
best_one_sided.h1.sub.pla.dplot$Group <- "Placebo presence" #label the type of estimate

plot.best_one_sided.h1.sub <- rbind(best_one_sided.h1.sub.dplot, #combine for ggplot
                                        best_one_sided.h1.sub.pla.dplot)

@

% PRED PROBS FIGURE FOR H1: Additional Attack and Battle Death Types Sub Data
<<GEDtypes_ModelsH1_sub, eval=TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 9, fig.height = 3.2, out.width= "1\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure shows the effect of refugee presence on state, non-state, and one-sided conflict outcomes (black), compared to their respective placebo estimates, i.e. effect of future refugee presence (gray). All point estimates include 95$\\%$ CIs.")>>=

# plots
pd <- position_dodge(0.5) # move them .05 to the left and right

PLOT.attacktypes.h1.sub <- bind_rows(plot.attack_state_based.h1.sub,
                                     plot.attack_non_state.h1.sub,
                                     plot.attack_one_sided.h1.sub) %>%
  mutate(outcome = fct_relevel(outcome, 
                               "attack_state_based", 
                               "attack_non_state",
                               "attack_one_sided")) %>%
  ggplot(aes(x = outcome, y = estimate, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(conf.low)), 
                    ymax = as.numeric(as.character(conf.high))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-1.3,1.3) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("State-based\nviolent events", 
                            "Non-state\nviolent events", 
                            "One-sided\nviolent events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 


PLOT.besttypes.h1.sub <- bind_rows(plot.best_state_based.h1.sub,
                                   plot.best_non_state.h1.sub,
                                   plot.best_one_sided.h1.sub) %>%
  mutate(outcome = fct_relevel(outcome, 
                               "log(best_state_based + 1)", 
                               "log(best_non_state + 1)",
                               "log(best_one_sided + 1)")) %>%
  ggplot(aes(x = outcome, y = estimate, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(conf.low)), 
                    ymax = as.numeric(as.character(conf.high))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.2,.2) +
  ylab("Change in Predicted\nNumber (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("State-based\nbattle deaths", 
                            "Non-state\nbattle deaths", 
                            "One-sided\nbattle deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

(PLOT.attacktypes.h1.sub + PLOT.besttypes.h1.sub) + 
  ggtitle('Effect of Refugee Presence on Secondary Conflict Outcomes (H1)') +
  theme(plot.title = element_text(hjust= 3.2))

@

<<MainStateAttackModelsH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 2, with full data
attack_state_based.h2.full <- lm_robust(attack_state_based ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       attack_state_based_1 +
                       attack_non_state_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata)

#summary(attack_state_based.h2.full)

## Placebo model
attack_state_based.h2.full.pla <- lm_robust(attack_state_based ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       attack_state_based_1 +
                       attack_non_state_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtb)

#summary(attack_state_based.h2.full.pla)


## Treatment model 3, with subset data
attack_state_based.h2.sub <- lm_robust(attack_state_based ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       attack_state_based_1 +
                       attack_non_state_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),    
               data = geddatasub)

#summary(attack_state_based.h2.sub)

## Placebo model
attack_state_based.h2.sub.pla <- lm_robust(attack_state_based ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       attack_state_based_1 +
                       attack_non_state_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub)

#summary(attack_state_based.h2.sub.pla)


# Generate predicted probs
attack_state_based.h2.full.dplot <- pred.lm.int.r(attack_state_based.h2.full)
attack_state_based.h2.full.dplot$Group <- "Actual presence" #label the type of estimate

attack_state_based.h2.full.pla.dplot <- pred.lm.int.r(attack_state_based.h2.full.pla)
attack_state_based.h2.full.pla.dplot$Group <- "Placebo presence"

plot.attack_state_based.h2.full <- rbind(attack_state_based.h2.full.dplot, #combine for ggplot
                             attack_state_based.h2.full.pla.dplot)
 
attack_state_based.h2.sub.dplot <- pred.lm.int.r(attack_state_based.h2.sub)
attack_state_based.h2.sub.dplot$Group <- "Actual presence" #label the type of estimate

attack_state_based.h2.sub.pla.dplot <- pred.lm.int.r(attack_state_based.h2.sub.pla)
attack_state_based.h2.sub.pla.dplot$Group <- "Placebo presence"

plot.attack_state_based.h2.sub <- rbind(attack_state_based.h2.sub.dplot, #combine for ggplot
                            attack_state_based.h2.sub.pla.dplot)

@

<<MainNonStateAttackModelsH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 2, with full data
attack_non_state.h2.full <- lm_robust(attack_non_state ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       attack_state_based_1 +
                       attack_non_state_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata)

#summary(attack_non_state.h2.full)

## Placebo model
attack_non_state.h2.full.pla <- lm_robust(attack_non_state ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       attack_state_based_1 +
                       attack_non_state_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtb)

#summary(attack_non_state.h2.full.pla)


## Treatment model 3, with subset data
attack_non_state.h2.sub <- lm_robust(attack_non_state ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       attack_state_based_1 +
                       attack_non_state_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(attack_non_state.h2.sub)

## Placebo model
attack_non_state.h2.sub.pla <- lm_robust(attack_non_state ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       attack_state_based_1 +
                       attack_non_state_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = plagedrtbsub)

#summary(attack_non_state.h2.sub.pla)


# Generate predicted probs
attack_non_state.h2.full.dplot <- pred.lm.int.r(attack_non_state.h2.full)
attack_non_state.h2.full.dplot$Group <- "Actual presence" #label the type of estimate

attack_non_state.h2.full.pla.dplot <- pred.lm.int.r(attack_non_state.h2.full.pla)
attack_non_state.h2.full.pla.dplot$Group <- "Placebo presence"

plot.attack_non_state.h2.full <- rbind(attack_non_state.h2.full.dplot, #combine for ggplot
                             attack_non_state.h2.full.pla.dplot)
 
attack_non_state.h2.sub.dplot <- pred.lm.int.r(attack_non_state.h2.sub)
attack_non_state.h2.sub.dplot$Group <- "Actual presence" #label the type of estimate

attack_non_state.h2.sub.pla.dplot <- pred.lm.int.r(attack_non_state.h2.sub.pla)
attack_non_state.h2.sub.pla.dplot$Group <- "Placebo presence"

plot.attack_non_state.h2.sub <- rbind(attack_non_state.h2.sub.dplot, #combine for ggplot
                            attack_non_state.h2.sub.pla.dplot)

@

<<MainOneSidedAttackModelsH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 2, with full data
attack_one_sided.h2.full <- lm_robust(attack_one_sided ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       attack_one_sided_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year), 
               data = geddata)

#summary(attack_one_sided.h2.full)

## Placebo model
attack_one_sided.h2.full.pla <- lm_robust(attack_one_sided ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       attack_one_sided_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = plagedrtb)

#summary(attack_one_sided.h2.full.pla)


## Treatment model 3, with subset data
attack_one_sided.h2.sub <- lm_robust(attack_one_sided ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       attack_one_sided_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(attack_one_sided.h2.sub)

## Placebo model
attack_one_sided.h2.sub.pla <- lm_robust(attack_one_sided ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       attack_one_sided_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year), 
               data = plagedrtbsub)

#summary(attack_one_sided.h2.sub.pla)


# Generate predicted probs
attack_one_sided.h2.full.dplot <- pred.lm.int.r(attack_one_sided.h2.full)
attack_one_sided.h2.full.dplot$Group <- "Actual presence" #label the type of estimate

attack_one_sided.h2.full.pla.dplot <- pred.lm.int.r(attack_one_sided.h2.full.pla)
attack_one_sided.h2.full.pla.dplot$Group <- "Placebo presence"

plot.attack_one_sided.h2.full <- rbind(attack_one_sided.h2.full.dplot, #combine for ggplot
                             attack_one_sided.h2.full.pla.dplot)
 
attack_one_sided.h2.sub.dplot <- pred.lm.int.r(attack_one_sided.h2.sub)
attack_one_sided.h2.sub.dplot$Group <- "Actual presence" #label the type of estimate

attack_one_sided.h2.sub.pla.dplot <- pred.lm.int.r(attack_one_sided.h2.sub.pla)
attack_one_sided.h2.sub.pla.dplot$Group <- "Placebo presence"

plot.attack_one_sided.h2.sub <- rbind(attack_one_sided.h2.sub.dplot, #combine for ggplot
                            attack_one_sided.h2.sub.pla.dplot)

@

<<MainStateBattleDeathModelsH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 1, with full data
best_state_based.h2.full <- lm_robust(log(best_state_based+1) ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       log(best_state_based_1+1) +
                       log(best_non_state_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year), 
               data = geddata)

#summary(best_state_based.h2.full)

## Placebo model
best_state_based.h2.full.pla <- lm_robust(log(best_state_based+1) ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       log(best_state_based_1+1) +
                       log(best_non_state_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year), 
               data = plagedrtb)

#summary(best_state_based.h2.full.pla)


## Treatment model 3, with subset data
best_state_based.h2.sub <- lm_robust(log(best_state_based+1) ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       log(best_state_based_1+1) +
                       log(best_non_state_1+1) +
                       attack_neighbors_sum + 
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year), 
               data = geddatasub)

#summary(best_state_based.h2.sub)

## Placebo model
best_state_based.h2.sub.pla <- lm_robust(log(best_state_based+1) ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       log(best_state_based_1+1) +
                       log(best_non_state_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub)

#summary(best_state_based.h2.sub.pla)


# Generate predicted probs
best_state_based.h2.full.dplot <- pred.lm.int.r(best_state_based.h2.full)
best_state_based.h2.full.dplot$Group <- "Actual presence" #label the type of estimate

best_state_based.h2.full.pla.dplot <- pred.lm.int.r(best_state_based.h2.full.pla)
best_state_based.h2.full.pla.dplot$Group <- "Placebo presence"

plot.best_state_based.h2.full <- rbind(best_state_based.h2.full.dplot, #combine for ggplot
                             best_state_based.h2.full.pla.dplot)
 
best_state_based.h2.sub.dplot <- pred.lm.int.r(best_state_based.h2.sub)
best_state_based.h2.sub.dplot$Group <- "Actual presence" #label the type of estimate

best_state_based.h2.sub.pla.dplot <- pred.lm.int.r(best_state_based.h2.sub.pla)
best_state_based.h2.sub.pla.dplot$Group <- "Placebo presence"

plot.best_state_based.h2.sub <- rbind(best_state_based.h2.sub.dplot, #combine for ggplot
                            best_state_based.h2.sub.pla.dplot)

@

<<MainNonStateBattleDeathModelsH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 1, with full data
best_non_state.h2.full <- lm_robust(log(best_non_state+1) ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       log(best_state_based_1+1) +
                       log(best_non_state_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata)

#summary(best_non_state.h2.full)

## Placebo model
best_non_state.h2.full.pla <- lm_robust(log(best_non_state+1) ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       log(best_state_based_1+1) +
                       log(best_non_state_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = plagedrtb)

#summary(best_non_state.h2.full.pla)


## Treatment model 3, with subset data
best_non_state.h2.sub <- lm_robust(log(best_non_state+1) ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       log(best_state_based_1+1) +
                       log(best_non_state_1+1) +
                       attack_neighbors_sum + 
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(best_non_state.h2.sub)

## Placebo model
best_non_state.h2.sub.pla <- lm_robust(log(best_non_state+1) ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       log(best_state_based_1+1) +
                       log(best_non_state_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = plagedrtbsub)

#summary(best_non_state.h2.sub.pla)


# Generate predicted probs
best_non_state.h2.full.dplot <- pred.lm.int.r(best_non_state.h2.full)
best_non_state.h2.full.dplot$Group <- "Actual presence" #label the type of estimate

best_non_state.h2.full.pla.dplot <- pred.lm.int.r(best_non_state.h2.full.pla)
best_non_state.h2.full.pla.dplot$Group <- "Placebo presence"

plot.best_non_state.h2.full <- rbind(best_non_state.h2.full.dplot, #combine for ggplot
                             best_non_state.h2.full.pla.dplot)
 
best_non_state.h2.sub.dplot <- pred.lm.int.r(best_non_state.h2.sub)
best_non_state.h2.sub.dplot$Group <- "Actual presence" #label the type of estimate

best_non_state.h2.sub.pla.dplot <- pred.lm.int.r(best_non_state.h2.sub.pla)
best_non_state.h2.sub.pla.dplot$Group <- "Placebo presence"

plot.best_non_state.h2.sub <- rbind(best_non_state.h2.sub.dplot, #combine for ggplot
                            best_non_state.h2.sub.pla.dplot)


@

<<MainOneSidedBattleDeathModelsH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 1, with full data
best_one_sided.h2.full <- lm_robust(log(best_one_sided+1) ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       log(best_one_sided_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata)

#summary(best_one_sided.h2.full)

## Placebo model
best_one_sided.h2.full.pla <- lm_robust(log(best_one_sided+1) ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       log(best_one_sided_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtb)

#summary(best_one_sided.h2.full.pla)


## Treatment model 3, with subset data
best_one_sided.h2.sub <- lm_robust(log(best_one_sided+1) ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       log(best_one_sided_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = geddatasub)

#summary(best_one_sided.h2.sub)

## Placebo model
best_one_sided.h2.sub.pla <- lm_robust(log(best_one_sided+1) ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       log(best_one_sided_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub)

#summary(best_one_sided.h2.sub.pla)


# Generate predicted probs
best_one_sided.h2.full.dplot <- pred.lm.int.r(best_one_sided.h2.full)
best_one_sided.h2.full.dplot$Group <- "Actual presence" #label the type of estimate

best_one_sided.h2.full.pla.dplot <- pred.lm.int.r(best_one_sided.h2.full.pla)
best_one_sided.h2.full.pla.dplot$Group <- "Placebo presence"

plot.best_one_sided.h2.full <- rbind(best_one_sided.h2.full.dplot, #combine for ggplot
                             best_one_sided.h2.full.pla.dplot)
 
best_one_sided.h2.sub.dplot <- pred.lm.int.r(best_one_sided.h2.sub)
best_one_sided.h2.sub.dplot$Group <- "Actual presence" #label the type of estimate

best_one_sided.h2.sub.pla.dplot <- pred.lm.int.r(best_one_sided.h2.sub.pla)
best_one_sided.h2.sub.pla.dplot$Group <- "Placebo presence"

plot.best_one_sided.h2.sub <- rbind(best_one_sided.h2.sub.dplot, #combine for ggplot
                            best_one_sided.h2.sub.pla.dplot)


@

% PRED PROBS FIGURE FOR H2: Additional Attack and Battle Death Types Sub Data
<<GEDtypes_ModelsH2_sub, eval=TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 9, fig.height = 6, out.width= "1\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure shows the effect of refugee presence on state, non-state, and one-sided conflict outcomes conflict outcomes (black) compared to their respective placebo estimates, i.e. effect of future refugee presence (gray), conditional on refugee presence in other provinces of the same country-year. All point estimates include 95$\\%$ CIs.")>>=

PLOT.attacktypes.h2a.sub <- bind_rows(plot.attack_state_based.h2.sub[c(1,3),],
                                     plot.attack_non_state.h2.sub[c(1,3),],
                                     plot.attack_one_sided.h2.sub[c(1,3),]) %>%
  mutate(outcome = fct_relevel(outcome, 
                               "attack_state_based", 
                               "attack_non_state",
                               "attack_one_sided")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-2,2) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("State-based\nviolent events", 
                            "Non-state\nviolent events", 
                            "One-sided\nviolent events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 

PLOT.attacktypes.h2b.sub <- bind_rows(plot.attack_state_based.h2.sub[c(2,4),],
                                     plot.attack_non_state.h2.sub[c(2,4),],
                                     plot.attack_one_sided.h2.sub[c(2,4),]) %>%
  mutate(outcome = fct_relevel(outcome, 
                               "attack_state_based", 
                               "attack_non_state",
                               "attack_one_sided")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-2,2) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("State-based\nviolent events", 
                            "Non-state\nviolent events", 
                            "One-sided\nviolent events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.besttypes.h2a.sub <- bind_rows(plot.best_state_based.h2.sub[c(1,3),],
                                     plot.best_non_state.h2.sub[c(1,3),],
                                     plot.best_one_sided.h2.sub[c(1,3),]) %>%
  mutate(outcome = fct_relevel(outcome, 
                               "log(best_state_based + 1)", 
                               "log(best_non_state + 1)",
                               "log(best_one_sided + 1)")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.5,.5) +
  ylab("Change in Predicted\nNumber (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("State-based\nbattle deaths", 
                            "Non-state\nbattle deaths", 
                            "One-sided\nbattle deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.besttypes.h2b.sub <- bind_rows(plot.best_state_based.h2.sub[c(2,4),],
                                     plot.best_non_state.h2.sub[c(2,4),],
                                     plot.best_one_sided.h2.sub[c(2,4),]) %>%
  mutate(outcome = fct_relevel(outcome, 
                               "log(best_state_based + 1)", 
                               "log(best_non_state + 1)",
                               "log(best_one_sided + 1)")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.5,.5) +
  ylab("Change in Predicted\nNumber (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("State-based\nbattle deaths", 
                            "Non-state\nbattle deaths", 
                            "One-sided\nbattle deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )


patch1 <- (PLOT.attacktypes.h2a.sub + PLOT.besttypes.h2a.sub) +
ggtitle("Effect of Concentrated Refugee Presence on Secondary Conflict Outcomes (H2a)") + 
  theme(plot.title = element_text(hjust= 1.8))

patch2 <- (PLOT.attacktypes.h2b.sub + PLOT.besttypes.h2b.sub) +
ggtitle("Effect of Dispersed Refugee Presence on Secondary Conflict Outcomes (H2b)") + 
  theme(plot.title = element_text(hjust= 2))

patch1/patch2


@


%%%%% PANEL MATCH
\newpage
\section{Analysis using Panel Match}
\label{SIsec:panelmatch}

We use a matching method for causal identification with time-series cross-section data by \citet{Imai:2018}. In this method, each treated observation is matched with a set of control observations that share identical treatment history up to the pre-specified number of periods. Next, this set of matched controls is further refined by adjusting for covariates. Finally, we simply calculate the difference-in-differences estimator in order to account for an underlying time trend. To do this, we use the R package \texttt{PanelMatch} \citep{Imai:2018package}. Specifically, we create matched sets with 3 years (default) of treatment history, e.g. comparing ``treated provinces'' with refugee presence histories of [0 0 1] observations to all "control provinces" of [0 0 0] observations, without refinement. Results are substantively similar when refining on mahalanobis distance or CBPS propensity score. 

Similar to the results in the paper, these results using matching analyses show a null effect of refugee presence on conflict outcomes, and whether conditional on dispersed/concentrated or not. Note, these results are noisier than those in the main paper, since \texttt{PanelMatch} only analyzes those treated and control observations for which comparison units with identical treatment histories can be found. For example, in the first onset analysis, only 101 treated unit-year pairs have matches that can be used to estimate the ATT, and only 55 control unit-year pairs have matches that can be used to calculate the ATC. Refinement using pre-treatment covariates only reduces the size of these matched sets even further. Therefore, we urge caution when interpreting these results, due to the small subset of the data used to estimate causal effects.

% Effect of Refugee Presence on Conflict Outcomes
<<panelmatch_fig2_refpresence>>=
## Data cleaning
geddatasub$admin_id <- as.integer(as.factor(geddatasub$GMI_ADMIN))
geddatasub$country_factor <- as.factor(geddatasub$Country)
geddatasub$log_best1 <- log(geddatasub$best+1)

## Set parameters
lag_num <- 3
sizematch_num <- 3
nboot <- 100
model <- "none"

## Onset model, with subset data
pm_onset <- PanelMatch(lag = lag_num, time.id = "year", unit.id = "admin_id", 
                       treatment = "rtb", refinement.method = model,
                       data = geddatasub[geddatasub$year < 2009,], 
                       match.missing = FALSE, listwise.delete = TRUE,
                       covs.formula = ~ rtb.other + 
                         onset.n_1 + 
                         attack_neighbors_sum +
                         log_pop_1 + 
                         gcp_ppp_1 + 
                         STD + 
                         SQKM_ADMIN + 
                         log_bdist2 + 
                         #log(bdist3) +
                         log_capdist + 
                         idpb + 
                         country_factor,
                       size.match = sizematch_num, qoi = "ate" , outcome.var = "onset.n",
                       lead = 0, forbid.treatment.reversal = FALSE,
                       use.diagonal.variance.matrix = TRUE)

pm_onset_results <- PanelEstimate(pm_onset, data = geddatasub[geddatasub$year < 2009,],
                                  number.iterations = nboot)

## Incidence model, with subset data
pm_incidence <- PanelMatch(lag = lag_num, time.id = "year", unit.id = "admin_id", 
                           treatment = "rtb", refinement.method = model,
                           data = geddatasub[geddatasub$year < 2009,], 
                           match.missing = FALSE, listwise.delete = TRUE,
                           covs.formula = ~ rtb.other + 
                             incidence_1 + 
                             attack_neighbors_sum +
                             log_pop_1 + 
                             gcp_ppp_1 + 
                             STD + 
                             SQKM_ADMIN + 
                             log_bdist2 + 
                             #log(bdist3) +
                             log_capdist + 
                             idpb + 
                             country_factor,
                           size.match = sizematch_num, qoi = "ate" , outcome.var = "incidence",
                           lead = 0, forbid.treatment.reversal = FALSE,
                           use.diagonal.variance.matrix = TRUE)

pm_incidence_results <- PanelEstimate(pm_incidence, data = geddatasub[geddatasub$year < 2009,],
                                      number.iterations = nboot)

## Attack, with subset data
pm_attack <- PanelMatch(lag = lag_num, time.id = "year", unit.id = "admin_id", 
                           treatment = "rtb", refinement.method = model,
                           data = geddatasub, 
                           match.missing = FALSE, listwise.delete = TRUE,
                           covs.formula = ~ rtb.other + 
                             attack_1 + 
                             attack_neighbors_sum +
                             log_pop_1 + 
                             gcp_ppp_1 + 
                             STD + 
                             SQKM_ADMIN + 
                             log_bdist2 + 
                             #log(bdist3) +
                             log_capdist + 
                             idpb + 
                             country_factor,
                           size.match = sizematch_num, qoi = "ate" , outcome.var = "attack",
                           lead = 0, forbid.treatment.reversal = FALSE,
                           use.diagonal.variance.matrix = TRUE)

pm_attack_results <- PanelEstimate(pm_attack, data = geddatasub,
                                   number.iterations = nboot)

## Battle deaths model, with subset data
pm_best <- PanelMatch(lag = lag_num, time.id = "year", unit.id = "admin_id", 
                      treatment = "rtb", refinement.method = model,
                      data = geddatasub, 
                      match.missing = FALSE, listwise.delete = TRUE,
                      covs.formula = ~ rtb.other + 
                        I(log(best_1+1)) + 
                        attack_neighbors_sum +
                        log_pop_1 + 
                        gcp_ppp_1 + 
                        STD + 
                        SQKM_ADMIN + 
                        log_bdist2 + 
                        #log(bdist3) +
                        log_capdist + 
                        idpb + 
                        country_factor,
                      size.match = sizematch_num, qoi = "ate" , outcome.var = "log_best1",
                      lead = 0, forbid.treatment.reversal = FALSE,
                      use.diagonal.variance.matrix = TRUE)

pm_best_results <- PanelEstimate(pm_best, data = geddatasub,
                                 number.iterations = nboot)

@

<<panelmatch_fig2_refpresence_figure, eval = TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 9, fig.height = 3, out.width= "1\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure shows the effect of refugee presence on conflict outcomes using PanelMatch. It corresponds to Figure \\ref{fig:MainModelsH1_sub} in the paper. All point estimates include 95$\\%$ CIs.")>>=

## Create plot df
df_plot_fig2 <- data.frame(
  est = c(pm_onset_results$estimates, pm_incidence_results$estimates, pm_attack_results$estimates,
          pm_best_results$estimates)
)
cis <- rbind(t(apply(pm_onset_results$bootstrapped.estimates, 2, quantile, probs = c(.025, .975), na.rm = TRUE)),
             t(apply(pm_incidence_results$bootstrapped.estimates, 2, quantile, probs = c(.025, .975), na.rm = TRUE)),
             t(apply(pm_attack_results$bootstrapped.estimates, 2, quantile, probs = c(.025, .975), na.rm = TRUE)),
             t(apply(pm_best_results$bootstrapped.estimates, 2, quantile, probs = c(.025, .975), na.rm = TRUE))) %>%
  as.data.frame()
names(cis) <- c("lower", "upper")
df_plot_fig2 <- bind_cols(df_plot_fig2, cis) %>%
  mutate(name = c("Onset", "Incidence", "Violent Events", "Battle Deaths"),
         name = fct_relevel(name, "Onset", "Incidence", "Violent Events", "Battle Deaths"))

## Create plots
on_inc <- df_plot_fig2 %>% filter(name %in% c("Onset", "Incidence")) %>%
  ggplot(aes(name, est)) + 
  geom_point(size=3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0, size = .7) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.15, .15)) +
  scale_colour_manual("black") +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 
viol <- df_plot_fig2 %>% filter(name %in% c("Violent Events")) %>%
  ggplot(aes(name, est)) + 
  geom_point(size=3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0, size = .7) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_y_continuous(limits = c(-5, 5)) +
  scale_colour_manual("black") +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )
best <- df_plot_fig2 %>% filter(name %in% c("Battle Deaths")) %>%
  ggplot(aes(name, est)) + 
  geom_point(size=3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0, size = .7) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted\nNumber (logged)") +
  xlab("") +
  scale_y_continuous(limits = c(-.5, .5)) +
  scale_colour_manual("black") +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

#plot
patch <- (on_inc + viol + best)
patch + plot_annotation(title = 'Effect of Refugee Presence on Conflict Outcomes using Panel Match (H1)') 

@

% Effect of Concentrated/Dispersed Refugee Presence on Conflict Outcomes
<<panelmatch_fig3_refpresence_concdisp>>=
## Onset model, with subset data
pm_onset_concdisp <- PanelMatch(lag = lag_num, time.id = "year", unit.id = "admin_id", 
                       treatment = "rtb", refinement.method = model,
                       data = geddatasub[geddatasub$year < 2009,], 
                       match.missing = FALSE, listwise.delete = TRUE,
                       covs.formula = ~ onset.n_1 + 
                         attack_neighbors_sum +
                         log_pop_1 + 
                         gcp_ppp_1 + 
                         STD + 
                         SQKM_ADMIN + 
                         log_bdist2 + 
                         #log(bdist3) +
                         log_capdist + 
                         idpb + 
                         country_factor,
                       size.match = sizematch_num, qoi = "ate" , outcome.var = "onset.n",
                       lead = 0, forbid.treatment.reversal = FALSE,
                       use.diagonal.variance.matrix = TRUE)

pm_onset_concdisp_results <- PanelEstimate(pm_onset_concdisp, 
                                       data = geddatasub[geddatasub$year < 2009,],
                                       number.iterations = nboot,
                                       moderator = "rtb.other")

## Incidence model, with subset data
pm_incidence_concdisp <- PanelMatch(lag = lag_num, time.id = "year", unit.id = "admin_id", 
                           treatment = "rtb", refinement.method = model,
                           data = geddatasub[geddatasub$year < 2009,], 
                           match.missing = FALSE, listwise.delete = TRUE,
                           covs.formula = ~ incidence_1 + 
                             attack_neighbors_sum +
                             log_pop_1 + 
                             gcp_ppp_1 + 
                             STD + 
                             SQKM_ADMIN + 
                             log_bdist2 + 
                             #log(bdist3) +
                             log_capdist + 
                             idpb + 
                             country_factor,
                           size.match = sizematch_num, qoi = "ate" , outcome.var = "incidence",
                           lead = 0, forbid.treatment.reversal = FALSE,
                           use.diagonal.variance.matrix = TRUE)

pm_incidence_concdisp_results <- PanelEstimate(pm_incidence_concdisp, 
                                           data = geddatasub[geddatasub$year < 2009,],
                                           number.iterations = nboot,
                                           moderator = "rtb.other")

## Attack, with subset data
pm_attack_concdisp <- PanelMatch(lag = lag_num, time.id = "year", unit.id = "admin_id", 
                           treatment = "rtb", refinement.method = model,
                           data = geddatasub,
                           match.missing = FALSE, listwise.delete = TRUE,
                           covs.formula = ~ attack_1 + 
                             attack_neighbors_sum +
                             log_pop_1 + 
                             gcp_ppp_1 + 
                             STD + 
                             SQKM_ADMIN + 
                             log_bdist2 + 
                             #log(bdist3) +
                             log_capdist + 
                             idpb + 
                             country_factor,
                           size.match = sizematch_num, qoi = "ate" , outcome.var = "attack",
                           lead = 0, forbid.treatment.reversal = FALSE,
                           use.diagonal.variance.matrix = TRUE)

pm_attack_concdisp_results <- PanelEstimate(pm_attack_concdisp, 
                                        data = geddatasub,
                                        number.iterations = nboot,
                                        moderator = "rtb.other")

## Battle deaths model, with subset data
pm_best_concdisp <- PanelMatch(lag = lag_num, time.id = "year", unit.id = "admin_id", 
                           treatment = "rtb", refinement.method = model,
                           data = geddatasub,  
                           match.missing = FALSE, listwise.delete = TRUE,
                           covs.formula = ~ I(log(best_1+1)) + 
                             attack_neighbors_sum +
                             log_pop_1 + 
                             gcp_ppp_1 + 
                             STD + 
                             SQKM_ADMIN + 
                             log_bdist2 + 
                             #log(bdist3) +
                             log_capdist + 
                             idpb + 
                             country_factor,
                           size.match = sizematch_num, qoi = "ate" , outcome.var = "log_best1",
                           lead = 0, forbid.treatment.reversal = FALSE,
                           use.diagonal.variance.matrix = TRUE)

pm_best_concdisp_results <- PanelEstimate(pm_best_concdisp, data = geddatasub,
                                      number.iterations = nboot,
                                      moderator = "rtb.other")

## Wrap it up for plot - concentrated
df_plot_fig3_conc <- data.frame(
  est = c(pm_onset_concdisp_results$X0$estimates, 
          pm_incidence_concdisp_results$X0$estimates,
          pm_attack_concdisp_results$X0$estimates,
          pm_best_concdisp_results$X0$estimates)
)
cis <- rbind(t(apply(pm_onset_concdisp_results$X0$bootstrapped.estimates, 2, quantile, probs = c(.025, .975), na.rm = TRUE)),
             t(apply(pm_incidence_concdisp_results$X0$bootstrapped.estimates, 2, quantile, probs = c(.025, .975), na.rm = TRUE)),
             t(apply(pm_attack_concdisp_results$X0$bootstrapped.estimates, 2, quantile, probs = c(.025, .975), na.rm = TRUE)),
             t(apply(pm_best_concdisp_results$X0$bootstrapped.estimates, 2, quantile, probs = c(.025, .975), na.rm = TRUE))) %>%
  as.data.frame()
names(cis) <- c("lower", "upper")
df_plot_fig3_conc <- bind_cols(df_plot_fig3_conc , cis) %>%
  mutate(name = c("Onset", "Incidence", "Violent Events", "Battle Deaths"),
         name = fct_relevel(name, "Onset", "Incidence", "Violent Events", "Battle Deaths"),
         subset = "Concentrated")

## Wrap it up for plot - dispersed
df_plot_fig3_disp <- data.frame(
  est = c(pm_onset_concdisp_results$X1$estimates, 
          pm_incidence_concdisp_results$X1$estimates,
          pm_attack_concdisp_results$X1$estimates,
          pm_best_concdisp_results$X1$estimates)
)
cis <- rbind(t(apply(pm_onset_concdisp_results$X1$bootstrapped.estimates, 2, quantile, probs = c(.025, .975), na.rm = TRUE)),
             t(apply(pm_incidence_concdisp_results$X1$bootstrapped.estimates, 2, quantile, probs = c(.025, .975), na.rm = TRUE)),
             t(apply(pm_attack_concdisp_results$X1$bootstrapped.estimates, 2, quantile, probs = c(.025, .975), na.rm = TRUE)),
             t(apply(pm_best_concdisp_results$X1$bootstrapped.estimates, 2, quantile, probs = c(.025, .975), na.rm = TRUE))) %>%
  as.data.frame()
names(cis) <- c("lower", "upper")
df_plot_fig3_disp <- bind_cols(df_plot_fig3_disp , cis) %>%
  mutate(name = c("Onset", "Incidence", "Violent Events", "Battle Deaths"),
         name = fct_relevel(name, "Onset", "Incidence", "Violent Events", "Battle Deaths"),
         subset = "Dispersed")

@

<<panelmatch_fig3_heteffects_figure, eval = TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 9, fig.height = 6, out.width= "1\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure shows the effect of refugee presence on conflict outcomes conditional on refugee presence in other provinces of the same country-year, using PanelMatch. It corresponds to Figure \\ref{fig:MainModelsH2_sub} in the paper. All point estimates include 95$\\%$ CIs.")>>=

## ---------------------------
## Create plots - concentrated
## ---------------------------
on_inc_conc <- df_plot_fig3_conc %>% filter(name %in% c("Onset", "Incidence")) %>%
  ggplot(aes(name, est)) + 
  geom_point(size=3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0, size = .7) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.3, .3)) +
  scale_colour_manual("black") +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 
viol_conc <- df_plot_fig3_conc %>% filter(name %in% c("Violent Events")) %>%
  ggplot(aes(name, est)) + 
  geom_point(size=3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0, size = .7) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_y_continuous(limits = c(-10, 10)) +
  scale_colour_manual("black") +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )
best_conc <- df_plot_fig3_conc %>% filter(name %in% c("Battle Deaths")) %>%
  ggplot(aes(name, est)) + 
  geom_point(size=3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0, size = .7) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted\nNumber (logged)") +
  xlab("") +
  scale_y_continuous(limits = c(-1, 1)) +
  scale_colour_manual("black") +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

## ------------------------
## Create plots - dispersed
## ------------------------
on_inc_disp <- df_plot_fig3_disp %>% filter(name %in% c("Onset", "Incidence")) %>%
  ggplot(aes(name, est)) + 
  geom_point(size=3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0, size = .7) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.3, .3)) +
  scale_colour_manual("black") +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 
viol_disp <- df_plot_fig3_disp %>% filter(name %in% c("Violent Events")) %>%
  ggplot(aes(name, est)) + 
  geom_point(size=3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0, size = .7) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_y_continuous(limits = c(-10, 10)) +
  scale_colour_manual("black") +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )
best_disp <- df_plot_fig3_disp %>% filter(name %in% c("Battle Deaths")) %>%
  ggplot(aes(name, est)) + 
  geom_point(size=3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0, size = .7) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted\nNumber (logged)") +
  xlab("") +
  scale_y_continuous(limits = c(-1, 1)) +
  scale_colour_manual("black") +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

#plot
patch1 <- (on_inc_conc + viol_conc + best_conc) + 
  ggtitle("Effect of Concentrated Refugee Presence using Panel Match (H2a)") +
  theme(plot.title = element_text(hjust= 2))

patch2 <- (on_inc_disp + viol_disp + best_disp) +
  ggtitle('Effect of Dispersed Refugee Presence using Panel Match (H2b)') +
  theme(plot.title = element_text(hjust= 2.2))

patch1/patch2

@

% Effect of Concentrated/Dispersed Refugee Presence on Nighttime Lights
<<panelmatch_fig4_nightlights>>=

pm_calib <- PanelMatch(lag = lag_num, time.id = "year", unit.id = "admin_id", 
                         treatment = "rtb", refinement.method = model,
                         data = geddatasub, 
                         match.missing = FALSE, listwise.delete = TRUE,
                         covs.formula = ~ nlights_calib_mean_1 +
                           nlights_mean_neighbor + 
                           attack_neighbors_sum + 
                           log_pop_1 + 
                           gcp_ppp_1 + 
                           STD + 
                           SQKM_ADMIN + 
                           log_bdist2 + 
                           #log(bdist3) +
                           log_capdist + 
                           idpb + 
                           country_factor,
                         size.match = sizematch_num, qoi = "ate" , 
                       outcome.var = "nlights_calib_mean",
                         lead = 0, forbid.treatment.reversal = FALSE,
                         use.diagonal.variance.matrix = TRUE)
pm_calib_results <- PanelEstimate(pm_calib, data = geddatasub,
                                     number.iterations = nboot,
                                     moderator = "rtb.other")

@

<<panelmatch_fig4_nightlights_plot, eval = TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 7, fig.height = 3, out.width= ".8\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure shows the effect of refugee presence average nighttime lights conditional on refugee presence in other provinces of the same country-year, using PanelMatch. It corresponds to Figure \\ref{fig:MainModelsH3} in the paper. All point estimates include 95$\\%$ CIs.")>>=

## Create plot df
df_plot_fig4 <- data.frame(
  est = c(pm_calib_results$X0$estimates, pm_calib_results$X1$estimates)
)
cis <- rbind(t(apply(pm_calib_results$X0$bootstrapped.estimates, 2, quantile, probs = c(.025, .975), na.rm = TRUE)),
             t(apply(pm_calib_results$X1$bootstrapped.estimates, 2, quantile, probs = c(.025, .975), na.rm = TRUE))) %>%
  as.data.frame()
names(cis) <- c("lower", "upper")
df_plot_fig4 <- bind_cols(df_plot_fig4, cis) %>%
  mutate(name = "Average Nighttime Lights",
         subset = c("Concentrated", "Dispersed"))

## Create plots
nt_conc <- df_plot_fig4 %>% filter(subset == "Concentrated") %>%
  ggplot(aes(name, est)) + 
  geom_point(size=3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0, size = .7) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Lights") +
  xlab("") +
  ggtitle("Effect of Concentrated\nRefugee Presence (H3a)") + 
  scale_y_continuous(limits = c(-.6, .6)) +
  scale_colour_manual("black") +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 
nt_disp <- df_plot_fig4 %>% filter(subset == "Dispersed") %>%
  ggplot(aes(name, est)) + 
  geom_point(size=3) +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0, size = .7) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Lights") +
  xlab("") +
  ggtitle("Effect of Dispersed\nRefugee Presence (H3b)") + 
  scale_y_continuous(limits = c(-.6, .6)) +
  scale_colour_manual("black") +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 

#plot
nt_conc + nt_disp

@


%%%%% HETEROGENEOUS EFFECTS
\newpage
\section{Additional Heterogeneous and Subgroup Effects}
\label{SIsec:hte}

% %%% NEW VS. OLDER
% \subsection{Heterogeneous Effects of New versus Older Refugee Sites}
% \label{SIsec:hte_newold}
% 

%%% LARGE VS. SMALL
\subsection{Heterogeneous Effects of Large versus Small Refugee Populations}
\label{SIsec:hte_largesmall}

As described in the Data section of our paper, for a subset of our data, we have information from the UNHCR on refugee population, specifically the total number of refugees per site, which we aggregate to the province-year level. We only have this data for  African countries from 2010 to 2015. Thus, for the heterogeneous effects analysis based on population size, we can only examine the violent events and logged battle death outcomes as they span this time period. In this section, we create a binary variable for heterogeneous effects analysis based on the size of refugee presence in a province-year in order to test H4. We designate refugee-hosting provinces that have a 5\% ratio or greater of refugee population compared to local population as ``large'' and hosting provinces under 5\% as ``small.'' 


%H1: Effect of Large vs. Small Refugee Population on Conflict 
<<HetAttack_SiteSizeH1, eval=TRUE, echo = FALSE, tidy=TRUE, fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model with population size, with Africa subset data
attack.het.sitesize.sub <- lm_robust(attack ~ large_ref_population +
                       small_ref_population + 
                       rtb.other +
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = gedpopafrica)

#summary(attack.het.sitesize.sub)

## Placebo model
attack.het.sitesize.sub.pla <- lm_robust(attack ~ rtb.placebo + 
                       rtb.other + 
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedpopafrica)

#summary(attack.het.sitesize.sub.pla)

# Generate predicted probs
attack.het.sitesize.sub.dplot <- pred.lm.cat.r(attack.het.sitesize.sub) #generate predicted probs
attack.het.sitesize.sub.dplot$Group <- "Actual presence" #label the type of estimate

attack.het.sitesize.sub.pla <- tidy(attack.het.sitesize.sub.pla)[attack.het.sitesize.sub.pla$term == "rtb.placebo",] 
attack.het.sitesize.sub.pla$Group <- "Placebo presence"

plot.attack.het.sitesize.sub <- rbind(attack.het.sitesize.sub.dplot[,c(1,2,3,4,5,7,8)], #combine for ggplot
                                      attack.het.sitesize.sub.pla[,c(1,2,3,6,7,9,10)])

@

<<HetBattleDeath_SiteSizeH1, eval=TRUE, echo = FALSE, tidy=TRUE, fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model with population size, with Africa subset data
best.het.sitesize.sub <- lm_robust(log(best+1) ~ large_ref_population +
                       small_ref_population + 
                       rtb.other +
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = gedpopafrica)

#summary(best.het.sitesize.sub)


## Placebo model
best.het.sitesize.sub.pla <- lm_robust(log(best+1) ~ rtb.placebo + 
                       rtb.other + 
                       log(best_1+1) +
                       attack_neighbors_sum + 
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = plagedpopafrica)

#summary(best.het.sitesize.sub.pla)

# Generate predicted probs
best.het.sitesize.sub.dplot <- pred.lm.cat.r(best.het.sitesize.sub) #generate predicted probs
best.het.sitesize.sub.dplot$Group <- "Actual presence" #label the type of estimate

best.het.sitesize.sub.pla.dplot <- tidy(best.het.sitesize.sub.pla)[best.het.sitesize.sub.pla$term == "rtb.placebo",] 
best.het.sitesize.sub.pla.dplot$Group <- "Placebo presence"

plot.best.het.sitesize.sub <- rbind(best.het.sitesize.sub.dplot[,c(1,2,3,4,5,7,8)], #combine for ggplot
                                      best.het.sitesize.sub.pla.dplot[,c(1,2,3,6,7,9,10)])

@

<<Het_SiteSize1_sub, eval=TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 9, fig.height = 5.5, out.width= "1\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure shows the effect of refugee presence in provinces hosting relatively large versus small refugee populations (heterogeneous effects analysis) on violent events and battle deaths 2010--2015 in Africa (black), compared to their respective placebo estimates (gray). All point estimates include 95$\\%$ CIs.")>>=

# plots
pd <- position_dodge(0.5) # move them .05 to the left and right

PLOT.attack.het.largepop.sub <- ggplot(plot.attack.het.sitesize.sub[c(1,3),], 
                                       aes(x = outcome, y = estimate, group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(conf.low)), 
                    ymax = as.numeric(as.character(conf.high))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-10,10) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 

PLOT.attack.het.smallpop.sub <- ggplot(plot.attack.het.sitesize.sub[c(2,3),], 
                                      aes(x = outcome, y = estimate, group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(conf.low)), 
                    ymax = as.numeric(as.character(conf.high))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-10,10) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.het.largepop.sub <- ggplot(plot.best.het.sitesize.sub[c(1,3),], 
                          aes(x = outcome, y = estimate, group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(conf.low)), 
                    ymax = as.numeric(as.character(conf.high))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.7,.7) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.het.smallpop.sub <- ggplot(plot.best.het.sitesize.sub[c(2,3),], 
                          aes(x = outcome, y = estimate, group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(conf.low)), 
                    ymax = as.numeric(as.character(conf.high))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.7,.7) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

patch1 <- (PLOT.attack.het.largepop.sub + ggtitle("Effect of Large Refugee Population (H1)")) + 
  PLOT.best.het.largepop.sub +
  theme(plot.title = element_text(hjust= 0, size = 11))

patch2 <- (PLOT.attack.het.smallpop.sub + ggtitle("Effect of Small Refugee Population (H1)")) + 
  PLOT.best.het.smallpop.sub +
  theme(plot.title = element_text(hjust= 0, size = 11))

patch1/patch2

@

Figure \ref{fig:Het_SiteSize1_sub} shows the heterogeneous effects for provinces hosting large relative populations (top plot) versus small (bottom plot), respectively, on violent events and battle deaths (onset and incidence fall outside the time period of this subset data). There is no effect of large relative populations on conflict. However, there is a positive effect of small populations on conflict. This may be due to small populations not bringing developmental benefits, and being easier targets for victimization. Future research should further interrogate this finding.

%H2: Effect of Concentrated/Dispersed Large vs. Small Refugee Population on Conflict 
<<HetAttack_SiteSizeH2, eval=TRUE, echo = FALSE, tidy=TRUE, fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model with rt_1, with subset data
attack.h2.het.largepop.sub <- lm_robust(attack ~ large_ref_population +
                       rtb.other + 
                       large_ref_population_rtb.other +
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = gedpopafrica)

#summary(attack.h2.het.largepop.sub)

attack.h2.het.smallpop.sub <- lm_robust(attack ~ small_ref_population +
                       rtb.other + 
                       small_ref_population_rtb.other +
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = gedpopafrica)

#summary(attack.h2.het.smallpop.sub)

## Placebo model
attack.h2.het.sitesize.sub.pla <- lm_robust(attack ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedpopafrica)

#summary(attack.h2.het.sitesize.sub.pla)

# Generate predicted probs
attack.h2.het.largepop.sub.dplot <- pred.lm.int.r(attack.h2.het.largepop.sub) #generate predicted probs
attack.h2.het.largepop.sub.dplot$Group <- "Actual presence" #label the type of estimate

attack.h2.het.smallpop.sub.dplot <- pred.lm.int.r(attack.h2.het.smallpop.sub) #generate predicted probs
attack.h2.het.smallpop.sub.dplot$Group <- "Actual presence" #label the type of estimate

attack.h2.het.sitesize.sub.pla.dplot <- pred.lm.int.r(attack.h2.het.sitesize.sub.pla)
attack.h2.het.sitesize.sub.pla.dplot$Group <- "Placebo presence"

plot.attack.h2.het.largepop.sub <- rbind(attack.h2.het.largepop.sub.dplot, #combine for ggplot
                                      attack.h2.het.sitesize.sub.pla.dplot)

plot.attack.h2.het.smallpop.sub <- rbind(attack.h2.het.smallpop.sub.dplot, #combine for ggplot
                                      attack.h2.het.sitesize.sub.pla.dplot)

@

<<HetBattleDeath_SiteSizeH2, eval=TRUE, echo = FALSE, tidy=TRUE, fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model with rt_1, with subset data
best.h2.het.largepop.sub <- lm_robust(log(best+1) ~ large_ref_population +
                       rtb.other + 
                       large_ref_population_rtb.other +
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = gedpopafrica)

#summary(best.h2.het.largepop.sub)

best.h2.het.smallpop.sub <- lm_robust(log(best+1) ~ small_ref_population +
                       rtb.other + 
                       small_ref_population_rtb.other +
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = gedpopafrica)

#summary(best.h2.het.smallpop.sub)


## Placebo model
best.h2.het.sitesize.sub.pla <- lm_robust(log(best+1) ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedpopafrica)

#summary(best.h2.het.sitesize.sub.pla)

# Generate predicted probs
best.h2.het.largepop.sub.dplot <- pred.lm.int.r(best.h2.het.largepop.sub) #generate predicted probs
best.h2.het.largepop.sub.dplot$Group <- "Actual presence" #label the type of estimate

best.h2.het.smallpop.sub.dplot <- pred.lm.int.r(best.h2.het.smallpop.sub) #generate predicted probs
best.h2.het.smallpop.sub.dplot$Group <- "Actual presence" #label the type of estimate

best.h2.het.sitesize.sub.pla.dplot <- pred.lm.int.r(best.h2.het.sitesize.sub.pla)
best.h2.het.sitesize.sub.pla.dplot$Group <- "Placebo presence"

plot.best.h2.het.largepop.sub <- rbind(best.h2.het.largepop.sub.dplot, #combine for ggplot
                                      best.h2.het.sitesize.sub.pla.dplot)

plot.best.h2.het.smallpop.sub <- rbind(best.h2.het.smallpop.sub.dplot, #combine for ggplot
                                      best.h2.het.sitesize.sub.pla.dplot)

@

<<Het_SizeSizeH2_sub, eval=TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 9, fig.height = 11, out.width= ".95\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure shows the effect  of refugee presence in provinces hosting relatively large versus small refugee populations (heterogeneous effects analysis) on violent events and battle deaths 2010--2015 in Africa (black) compared to their respective placebo estimates (gray), conditional on refugee presence in other provinces of the same country-year. All point estimates include 95$\\%$ CIs.")>>=

# plots
pd <- position_dodge(0.5) # move them .05 to the left and right

# LARGE POP: h2a is concentrated, h2b is dispersed
PLOT.attack.h2a.het.largepop.sub <- ggplot(plot.attack.h2.het.largepop.sub[c(1,3),], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-10,10) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 

PLOT.attack.h2b.het.largepop.sub <- ggplot(plot.attack.h2.het.largepop.sub[c(2,4),], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-10,10) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h2a.het.largepop.sub <- ggplot(plot.best.h2.het.largepop.sub[c(1,3),], 
                                    aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-1,2) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h2b.het.largepop.sub <- ggplot(plot.best.h2.het.largepop.sub[c(2,4),], 
                                    aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-1,2) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )


# SMALL POP: h2a is concentrated, h2b is dispersed
PLOT.attack.h2a.het.smallpop.sub <- ggplot(plot.attack.h2.het.smallpop.sub[c(1,3),], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-10,10) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.attack.h2b.het.smallpop.sub <- ggplot(plot.attack.h2.het.smallpop.sub[c(2,4),], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-10,10) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h2a.het.smallpop.sub <- ggplot(plot.best.h2.het.smallpop.sub[c(1,3),], 
                                    aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-1,2) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h2b.het.smallpop.sub <- ggplot(plot.best.h2.het.smallpop.sub[c(2,4),], 
                                    aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-1,2) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

patch1.largepop <- (PLOT.attack.h2a.het.largepop.sub + 
                      ggtitle("Effect of Concentrated Large Population (H2a)")) + 
  PLOT.best.h2a.het.largepop.sub +
  theme(plot.title = element_text(hjust= 0, size = 11))

patch1.smallpop <- (PLOT.attack.h2a.het.smallpop.sub + 
                      ggtitle("Effect of Concentrated Small Population (H2a)")) + 
  PLOT.best.h2a.het.smallpop.sub +
  theme(plot.title = element_text(hjust= 0, size = 11))

patch2.largepop <- (PLOT.attack.h2b.het.largepop.sub + 
                      ggtitle("Effect of Dispersed Large Population (H2b)")) + 
  PLOT.best.h2b.het.largepop.sub +
  theme(plot.title = element_text(hjust= 0, size = 11))

patch2.smallpop <- (PLOT.attack.h2b.het.smallpop.sub + 
                      ggtitle("Effect of Dispersed Small Population (H2b)")) + 
  PLOT.best.h2b.het.smallpop.sub +
  theme(plot.title = element_text(hjust= 0, size = 11))

patch1.largepop/
patch1.smallpop/
patch2.largepop/
patch2.smallpop

@

Figure \ref{fig:Het_SizeSizeH2_sub} shows the heterogeneous effects for provinces hosting large versus small relative populations respectively, conditional on refugee presence in other provinces of the same country-year. The top two plots compare large and small refugee presence when it is geographically concentrated, while the bottom two plots do so in geographically dispersed cases. It appears that the conditional risk reduction effects in the cases of large concentrated refugee presence. However, there is a positive effect of small dispersed presence on conflict. 

% H3: Effect of Concentrated/Dispersed Large vs. Small Refugee Population on Nighttime Lights 

<<HetNightLights_SiteSizeH2, eval=TRUE, echo = FALSE, tidy=TRUE, fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model with rt_1, with subset data
nlights_calib_mean.h2.het.largepop.sub <- lm_robust(nlights_calib_mean ~ large_ref_population +
                       rtb.other + 
                       large_ref_population_rtb.other +
                       nlights_calib_mean_1 +
                       nlights_mean_neighbor +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = gedpopafrica)

#summary(nlights_calib_mean.h2.het.largepop.sub)

nlights_calib_mean.h2.het.smallpop.sub <- lm_robust(nlights_calib_mean ~ small_ref_population +
                       rtb.other + 
                       small_ref_population_rtb.other +
                       nlights_calib_mean_1 +
                       nlights_mean_neighbor +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = gedpopafrica)

#summary(nlights_calib_mean.h2.het.smallpop.sub)

## Placebo model
nlights_calib_mean.h2.het.sitesize.sub.pla <- lm_robust(nlights_calib_mean ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       nlights_calib_mean_1 +
                       nlights_mean_neighbor +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedpopafrica)

#summary(nlights_calib_mean.h2.het.sitesize.sub.pla)

# Generate predicted probs
nlights_calib_mean.h2.het.largepop.sub.dplot <- pred.lm.int.r(nlights_calib_mean.h2.het.largepop.sub) #generate predicted probs
nlights_calib_mean.h2.het.largepop.sub.dplot$Group <- "Actual presence" #label the type of estimate

nlights_calib_mean.h2.het.smallpop.sub.dplot <- pred.lm.int.r(nlights_calib_mean.h2.het.smallpop.sub) #generate predicted probs
nlights_calib_mean.h2.het.smallpop.sub.dplot$Group <- "Actual presence" #label the type of estimate

nlights_calib_mean.h2.het.sitesize.sub.pla.dplot <- pred.lm.int.r(nlights_calib_mean.h2.het.sitesize.sub.pla)
nlights_calib_mean.h2.het.sitesize.sub.pla.dplot$Group <- "Placebo presence"

plot.nlights_calib_mean.h2.het.largepop.sub <- rbind(nlights_calib_mean.h2.het.largepop.sub.dplot, #combine for ggplot
                                      nlights_calib_mean.h2.het.sitesize.sub.pla.dplot)

plot.nlights_calib_mean.h2.het.smallpop.sub <- rbind(nlights_calib_mean.h2.het.smallpop.sub.dplot, #combine for ggplot
                                      nlights_calib_mean.h2.het.sitesize.sub.pla.dplot)

@

<<Het_SiteSizeH3_sub, eval=TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 9, fig.height = 6, out.width= "1\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure shows the effect  of refugee presence in provinces hosting relatively large versus small refugee populations (heterogeneous effects analysis) on nighttime lights 2010--2015 in Africa (black) compared to their respective placebo estimates (gray), conditional on refugee presence in other provinces of the same country-year. All point estimates include 95$\\%$ CIs.")>>=

# LARGE POP: h2a is concentrated, h2b is dispersed
PLOT.nlights_calib_mean.h2a.het.largepop.sub <- ggplot(plot.nlights_calib_mean.h2.het.largepop.sub[c(1,3),], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.006,.006) +
  ylab("Change in Predicted Lights") +
  xlab("") +
  scale_x_discrete(labels=c("Average Nighttime Lights")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.nlights_calib_mean.h2b.het.largepop.sub <- ggplot(plot.nlights_calib_mean.h2.het.largepop.sub[c(2,4),], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.006,.006) +
  ylab("Change in Predicted Lights") +
  xlab("") +
  scale_x_discrete(labels=c("Average Nighttime Lights")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )


# SMALL POP: h2a is concentrated, h2b is dispersed
PLOT.nlights_calib_mean.h2a.het.smallpop.sub <- ggplot(plot.nlights_calib_mean.h2.het.smallpop.sub[c(1,3),], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.006,.006) +
  ylab("Change in Predicted Lights") +
  xlab("") +
  scale_x_discrete(labels=c("Average Nighttime Lights")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.nlights_calib_mean.h2b.het.smallpop.sub <- ggplot(plot.nlights_calib_mean.h2.het.smallpop.sub[c(2,4),], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.006,.006) +
  ylab("Change in Predicted Lights") +
  xlab("") +
  scale_x_discrete(labels=c("Average Nighttime Lights")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )


patch1.largepop <- (PLOT.nlights_calib_mean.h2a.het.largepop.sub + 
                      ggtitle("Effect of Concentrated Large Population (H3a)") + 
  theme(plot.title = element_text(hjust= 0, size = 11))) 

patch1.smallpop <- (PLOT.nlights_calib_mean.h2a.het.smallpop.sub + 
                        ggtitle("Effect of Concentrated Small Population (H3a)") + 
  theme(plot.title = element_text(hjust= 0, size = 11))) 

patch2.largepop <- (PLOT.nlights_calib_mean.h2b.het.largepop.sub + 
                      ggtitle("Effect of Dispersed Large Population (H3b)") + 
  theme(plot.title = element_text(hjust= 0, size = 11)))

patch2.smallpop <- (PLOT.nlights_calib_mean.h2b.het.smallpop.sub + 
                        ggtitle("Effect of Dispersed Small Population (H3b)") + 
  theme(plot.title = element_text(hjust= 0, size = 11))) 

(patch1.largepop + patch1.smallpop) /
(patch2.largepop + patch2.smallpop)

@

Figure \ref{fig:Het_NearBorderH3} shows the heterogeneous effects on nighttime lights for provinces hosting large versus small relative populations respectively, conditional on refugee presence in other provinces of the same country-year. The top two plots compare large and small refugee presence when it is geographically concentrated, while the bottom two plots do so in geographically dispersed cases. We cannot discern differential effects by relative population size; note the placebo estimates for concentrated cases do not perform well. 


%%% CAMPS VS. SETTLEMENTS
\newpage
\subsection{Heterogeneous Effects of Formal Camps versus Informal Settlements}
\label{SIsec:hte_campssettlements}

We assess whether refugee camps or settlements have different effects on conflict onset or incidence. According to our discussions with UNHCR officials, whether a refugee site is designated as a ``camp'' or a ``settlement'' is largely a political decision by the host state to recognize it formally, not a decision by the UNHCR. While camps are generally associated with more state control and international recognition and settlements are informal areas of refugee communities, there is no clear delineation between the two based on realities on the ground such as population sizes or levels of aid.\footnote{Conducted with the Field Information and Coordination Support Section (FICSS) of UNHCR on December 30, 2013 by teleconference.} In fact, UNHCR operational protocols refer to camps and settlements together without explicitly differentiating the two.\footnote{\url{www.unhcr.org/uk/448d6c122.pdf}} Nevertheless, since host states choose to recognize ``settlements'' as informal sites, they might understand that these areas are less contentious with the local population than sites that they call ``camps.'' Thus, we hypothesize that provinces with refugee settlements might experience less conflict onset and incidents than provinces with camps.

Figure \ref{fig:CampsSettlementsPlot_sub} descriptively shows the number of provinces with only formal refugee camps (dotted line), informal refugee settlements (dashed line), and both (solid line) over time. Throughout the data of province-years, of all \Sexpr{sum(geddatasub$rtb)} provinces with refugee sites, \Sexpr{round(sum(geddatasub[geddatasub$rsb == 0,]$rcb)/sum(geddatasub$rtb), 4)*100}\% are provinces with only refugee camps, \Sexpr{round(sum(geddatasub[geddatasub$rcb == 0,]$rsb)/sum(geddatasub$rtb), 4)*100}\% are provinces with only refugee settlements, and only \Sexpr{round(sum(geddatasub[geddatasub$rcb == 1,]$rsb)/sum(geddatasub$rtb), 4)*100}\% are provinces with both. 

<<CampsSettlementsPlot_sub, eval = TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 7.5, fig.height = 3.5, out.width= "1\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure shows the number of provinces with only formal refugee camps (dotted line), informal refugee settlements (dashed line), and both (solid line) over time. Throughout the main data of province-years, of all ", sum(geddatasub$rtb)," provinces with refugee sites, ", round(sum(geddatasub[geddatasub$rsb == 0,]$rcb)/sum(geddatasub$rtb), 4)*100,"$\\%$ are provinces with only refugee camps, ", round(sum(geddatasub[geddatasub$rcb == 0,]$rsb)/sum(geddatasub$rtb), 4)*100,"$\\%$ are provinces with only refugee settlements, and ", round(sum(geddatasub[geddatasub$rcb == 1,]$rsb)/sum(geddatasub$rtb), 4)*100,"$\\%$ are provinces with both.", sep = "")>>=

#number of camps only, settlements only, both
rtb.no <- table(geddatasub$rtb, geddatasub$year) 

rcb.no <- table(geddatasub[geddatasub$rsb == 0,]$rcb, geddatasub[geddatasub$rsb == 0,]$year) #only camps

rsb.no <- table(geddatasub[geddatasub$rcb == 0,]$rsb, geddatasub[geddatasub$rcb == 0,]$year) #only settlements

rcsb.no <- table(geddatasub[geddatasub$rcb == 1,]$rsb, geddatasub[geddatasub$rcb == 1,]$year) #both

rcbrsbplot <- rbind(data.frame(X1 = rcb.no[2,], year = 1990:2018, group = "Provinces with only Refugee Camps"),
              data.frame(X1 = rsb.no[2,], year = 1990:2018, group = "Provinces with only Refugee Settlements"),
              data.frame(X1 = rcsb.no[2,], year = 1990:2018, group = "Provinces with both Camps and Settlements")
              )

rcbrsbplot[,-(3)] <- as.data.frame(lapply(rcbrsbplot[,-(3)], 
                               function(x) as.numeric(as.character(x))))

# plot
campssettlements_plot <- ggplot(rcbrsbplot, aes(x=year, y=X1, group=group)) +
  geom_line(aes(linetype=group)) +
  #ylim(0,.2) +
  ylab("Number of Provinces with only Refugee\n Camps, only Settlements, or both") +
  xlab("Year") +
  ylim(0, 100) +
  scale_linetype_manual(values=c("dotted", "dashed", "solid")) +
  theme(panel.background = element_blank(),
        legend.title = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.justification = c(0, 1), 
        legend.position= c(.01, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white"),
        axis.text.x  = element_text(vjust=0.5, hjust = .7, size=8)
        ) 

campssettlements_plot

@

Figure \ref{fig:Het_CampsSettH1} shows the heterogeneous effects of the presence of refugee camps (top plot) versus settlements (bottom plot), respectively, on the main conflict outcomes. All plots confirm that regardless of the type of refugee site, there is no effect of refugee presence on conflict. In fact, there is a negative effect of camps on onset and incidence, which is consistent with our theoretical predictions of increased development in these cases. 

% H1: Effect of Refugee Camps vs. Settlements on Conflict 
<<HetOnset_CampSettH1, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

# If perfect separation, because too many countries with no conflict in the full dataset, uninformitive priors are chosen based on the stan-dev Prior Choice wiki: https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations

## Treatment model 1, with full data
onset.h1.full.hetcamp <- bayesglm(onset.n ~ rcb + 
                       rcb.other + 
                       rsb + #control for settlements
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata[geddata$year < 2009,],
               prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h1.full.hetcamp)

onset.h1.full.hetsett <- bayesglm(onset.n ~ rsb + 
                       rsb.other + 
                       rcb + #control for camps
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata[geddata$year < 2009,],
               prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h1.full.hetsett)

## Placebo model
onset.h1.full.hetcamp.pla <- bayesglm(onset.n ~ rcb.placebo + 
                       rcb.other + 
                       rsb + #control for settlements
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrcb[plagedrcb$year < 2009,],
               prior.scale = 1, prior.df = 3,
               family = binomial(link = "logit")) 

#summary(onset.h1.full.hetcamp.pla)

onset.h1.full.hetsett.pla <- bayesglm(onset.n ~ rsb.placebo + 
                       rsb.other + 
                       rcb + #control for camps
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrsb[plagedrsb$year < 2009,],
               prior.scale = 1, prior.df = 3,
               family = binomial(link = "logit")) 

#summary(onset.h1.full.hetsett.pla)

## Treatment model 3, with subset data
onset.h1.sub.hetcamp <- bayesglm(onset.n ~ rcb + 
                       rcb.other + 
                       rsb + #control for settlements
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h1.sub.hetcamp)

onset.h1.sub.hetsett <- bayesglm(onset.n ~ rsb + 
                       rsb.other + 
                       rcb + #control for camps
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h1.sub.hetsett)

## Placebo model
onset.h1.sub.hetcamp.pla <- bayesglm(onset.n ~ rcb.placebo + 
                       rcb.other + 
                       rsb + #control for settlements
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrcbsub[plagedrcbsub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h1.sub.hetcamp.pla)

onset.h1.sub.hetsett.pla <- bayesglm(onset.n ~ rsb.placebo + 
                       rsb.other + 
                       rcb + #control for camps
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrsbsub[plagedrsbsub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h1.sub.hetsett.pla)

# Generate predicted probs
onset.h1.full.hetcamp.dplot <- pred.bi(onset.h1.full.hetcamp) #generate predicted probs
onset.h1.full.hetcamp.dplot$Group <- "Actual presence" #label the type of estimate

onset.h1.full.hetsett.dplot <- pred.bi(onset.h1.full.hetsett) 
onset.h1.full.hetsett.dplot$Group <- "Actual presence" 

onset.h1.full.hetcamp.pla.dplot <- pred.bi(onset.h1.full.hetcamp.pla)
onset.h1.full.hetcamp.pla.dplot$Group <- "Placebo presence"

onset.h1.full.hetsett.pla.dplot <- pred.bi(onset.h1.full.hetsett.pla)
onset.h1.full.hetsett.pla.dplot$Group <- "Placebo presence"

plot.onset.h1.full.hetcamp <- rbind(onset.h1.full.hetcamp.dplot, #combine for ggplot
                                    onset.h1.full.hetcamp.pla.dplot)

plot.onset.h1.full.hetsett <- rbind(onset.h1.full.hetsett.dplot, #combine for ggplot
                                    onset.h1.full.hetsett.pla.dplot)

onset.h1.sub.hetcamp.dplot <- pred.bi(onset.h1.sub.hetcamp) #generate predicted probs
onset.h1.sub.hetcamp.dplot$Group <- "Actual presence" #label the type of estimate

onset.h1.sub.hetsett.dplot <- pred.bi(onset.h1.sub.hetsett) 
onset.h1.sub.hetsett.dplot$Group <- "Actual presence" 

onset.h1.sub.hetcamp.pla.dplot <- pred.bi(onset.h1.sub.hetcamp.pla)
onset.h1.sub.hetcamp.pla.dplot$Group <- "Placebo presence"

onset.h1.sub.hetsett.pla.dplot <- pred.bi(onset.h1.sub.hetsett.pla)
onset.h1.sub.hetsett.pla.dplot$Group <- "Placebo presence"

plot.onset.h1.sub.hetcamp <- rbind(onset.h1.sub.hetcamp.dplot, #combine for ggplot
                                    onset.h1.sub.hetcamp.pla.dplot)

plot.onset.h1.sub.hetsett <- rbind(onset.h1.sub.hetsett.dplot, #combine for ggplot
                                    onset.h1.sub.hetsett.pla.dplot)

# # save output as Rdata files
# save(onset.h1.full.hetcamp, file = "onset.h1.full.hetcamp.Rdata")
# save(onset.h1.full.hetcamp.pla, file = "onset.h1.full.hetcamp.pla.Rdata")
# save(plot.onset.h1.full.hetcamp, file = "plot.onset.h1.full.hetcamp.Rdata")
# save(onset.h1.full.hetsett, file = "onset.h1.full.hetsett.Rdata")
# save(onset.h1.full.hetsett.pla, file = "onset.h1.full.hetsett.pla.Rdata")
# save(plot.onset.h1.full.hetsett, file = "plot.onset.h1.full.hetsett.Rdata")
# 
# save(onset.h1.sub.hetcamp, file = "onset.h1.sub.hetcamp.Rdata")
# save(onset.h1.sub.hetcamp.pla, file = "onset.h1.sub.hetcamp.pla.Rdata")
# save(plot.onset.h1.sub.hetcamp, file = "plot.onset.h1.sub.hetcamp.Rdata")
# save(onset.h1.sub.hetsett, file = "onset.h1.sub.hetsett.Rdata")
# save(onset.h1.sub.hetsett.pla, file = "onset.h1.sub.hetsett.pla.Rdata")
# save(plot.onset.h1.sub.hetsett, file = "plot.onset.h1.sub.hetsett.Rdata")

@

<<HetIncidence_CampSettH1, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

# If perfect separation, because too many countries with no conflict in the full dataset, uninformitive priors are chosen based on the stan-dev Prior Choice wiki: https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations

## Treatment model 1, with full data
incidence.h1.full.hetcamp <- bayesglm(incidence ~ rcb + 
                       rcb.other + 
                       rsb + #control for settlements
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata[geddata$year < 2009,],
               prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h1.full.hetcamp)

incidence.h1.full.hetsett <- bayesglm(incidence ~ rsb + 
                       rsb.other + 
                       rcb + #control for camps
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata[geddata$year < 2009,],
               prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h1.full.hetsett)

## Placebo model
incidence.h1.full.hetcamp.pla <- bayesglm(incidence ~ rcb.placebo + 
                       rcb.other + 
                       rsb + #control for settlements
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrcb[plagedrcb$year < 2009,],
               prior.scale = 1, prior.df = 3,
               family = binomial(link = "logit")) 

#summary(incidence.h1.full.hetcamp.pla)

incidence.h1.full.hetsett.pla <- bayesglm(incidence ~ rsb.placebo + 
                       rsb.other + 
                       rcb + #control for camps
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrsb[plagedrsb$year < 2009,],
               prior.scale = 1, prior.df = 3,
               family = binomial(link = "logit")) 

#summary(incidence.h1.full.hetsett.pla)

## Treatment model 3, with subset data
incidence.h1.sub.hetcamp <- bayesglm(incidence ~ rcb + 
                       rcb.other + 
                       rsb + #control for settlements
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h1.sub.hetcamp)

incidence.h1.sub.hetsett <- bayesglm(incidence ~ rsb + 
                       rsb.other + 
                       rcb + #control for camps
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h1.sub.hetsett)

## Placebo model
incidence.h1.sub.hetcamp.pla <- bayesglm(incidence ~ rcb.placebo + 
                       rcb.other + 
                       rsb + #control for settlements
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrcbsub[plagedrcbsub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h1.sub.hetcamp.pla)

incidence.h1.sub.hetsett.pla <- bayesglm(incidence ~ rsb.placebo + 
                       rsb.other + 
                       rcb + #control for camps
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrsbsub[plagedrsbsub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h1.sub.hetsett.pla)

# Generate predicted probs
incidence.h1.full.hetcamp.dplot <- pred.bi(incidence.h1.full.hetcamp) #generate predicted probs
incidence.h1.full.hetcamp.dplot$Group <- "Actual presence" #label the type of estimate

incidence.h1.full.hetsett.dplot <- pred.bi(incidence.h1.full.hetsett) 
incidence.h1.full.hetsett.dplot$Group <- "Actual presence" 

incidence.h1.full.hetcamp.pla.dplot <- pred.bi(incidence.h1.full.hetcamp.pla)
incidence.h1.full.hetcamp.pla.dplot$Group <- "Placebo presence"

incidence.h1.full.hetsett.pla.dplot <- pred.bi(incidence.h1.full.hetsett.pla)
incidence.h1.full.hetsett.pla.dplot$Group <- "Placebo presence"

plot.incidence.h1.full.hetcamp <- rbind(incidence.h1.full.hetcamp.dplot, #combine for ggplot
                                    incidence.h1.full.hetcamp.pla.dplot)

plot.incidence.h1.full.hetsett <- rbind(incidence.h1.full.hetsett.dplot, #combine for ggplot
                                    incidence.h1.full.hetsett.pla.dplot)

incidence.h1.sub.hetcamp.dplot <- pred.bi(incidence.h1.sub.hetcamp) #generate predicted probs
incidence.h1.sub.hetcamp.dplot$Group <- "Actual presence" #label the type of estimate

incidence.h1.sub.hetsett.dplot <- pred.bi(incidence.h1.sub.hetsett) 
incidence.h1.sub.hetsett.dplot$Group <- "Actual presence" 

incidence.h1.sub.hetcamp.pla.dplot <- pred.bi(incidence.h1.sub.hetcamp.pla)
incidence.h1.sub.hetcamp.pla.dplot$Group <- "Placebo presence"

incidence.h1.sub.hetsett.pla.dplot <- pred.bi(incidence.h1.sub.hetsett.pla)
incidence.h1.sub.hetsett.pla.dplot$Group <- "Placebo presence"

plot.incidence.h1.sub.hetcamp <- rbind(incidence.h1.sub.hetcamp.dplot, #combine for ggplot
                                    incidence.h1.sub.hetcamp.pla.dplot)

plot.incidence.h1.sub.hetsett <- rbind(incidence.h1.sub.hetsett.dplot, #combine for ggplot
                                    incidence.h1.sub.hetsett.pla.dplot)

# save output as Rdata files
# save(incidence.h1.full.hetcamp, file = "incidence.h1.full.hetcamp.Rdata")
# save(incidence.h1.full.hetcamp.pla, file = "incidence.h1.full.hetcamp.pla.Rdata")
# save(plot.incidence.h1.full.hetcamp, file = "plot.incidence.h1.full.hetcamp.Rdata")
# save(incidence.h1.full.hetsett, file = "incidence.h1.full.hetsett.Rdata")
# save(incidence.h1.full.hetsett.pla, file = "incidence.h1.full.hetsett.pla.Rdata")
# save(plot.incidence.h1.full.hetsett, file = "plot.incidence.h1.full.hetsett.Rdata")
# 
# save(incidence.h1.sub.hetcamp, file = "incidence.h1.sub.hetcamp.Rdata")
# save(incidence.h1.sub.hetcamp.pla, file = "incidence.h1.sub.hetcamp.pla.Rdata")
# save(plot.incidence.h1.sub.hetcamp, file = "plot.incidence.h1.sub.hetcamp.Rdata")
# save(incidence.h1.sub.hetsett, file = "incidence.h1.sub.hetsett.Rdata")
# save(incidence.h1.sub.hetsett.pla, file = "incidence.h1.sub.hetsett.pla.Rdata")
# save(plot.incidence.h1.sub.hetsett, file = "plot.incidence.h1.sub.hetsett.Rdata")

@

<<HetAttack_CampSettH1, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 1, with full data
attack.h1.full.hetcamp <- lm_robust(attack ~ rcb + 
                       rcb.other + 
                       rsb + #control for settlements
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata)

#summary(attack.h1.full.hetcamp)

attack.h1.full.hetsett <- lm_robust(attack ~ rsb + 
                       rsb.other + 
                       rcb + #control for camps
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata)

#summary(attack.h1.full.hetsett)

## Placebo model
attack.h1.full.hetcamp.pla <- lm_robust(attack ~ rcb.placebo + 
                       rcb.other + 
                       rsb + #control for settlements
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrcb)

#summary(attack.h1.full.hetcamp.pla)

attack.h1.full.hetsett.pla <- lm_robust(attack ~ rsb.placebo + 
                       rsb.other + 
                       rcb + #control for camps
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrsb)

#summary(attack.h1.full.hetsett.pla)


## Treatment model 3, with subset data
attack.h1.sub.hetcamp <- lm_robust(attack ~ rcb + 
                       rcb.other + 
                       rsb + #control for settlements
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(attack.h1.sub.hetcamp)

attack.h1.sub.hetsett <- lm_robust(attack ~ rsb + 
                       rsb.other + 
                       rcb + #control for camps
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(attack.h1.sub.hetsett)


## Placebo model
attack.h1.sub.hetcamp.pla <- lm_robust(attack ~ rcb.placebo + 
                       rcb.other + 
                       rsb + #control for settlements
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrcbsub)

#summary(attack.h1.sub.hetcamp.pla)

attack.h1.sub.hetsett.pla <- lm_robust(attack ~ rsb.placebo + 
                       rsb.other + 
                       rcb + #control for camps
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrsbsub)

#summary(attack.h1.sub.hetsett.pla)


# Generate predicted probs
#full data
attack.h1.full.hetcamp.dplot <- tidy(attack.h1.full.hetcamp)[attack.h1.full.hetcamp$term == "rcb",] 
attack.h1.full.hetcamp.dplot$Group <- "Actual presence" #label the type of estimate

attack.h1.full.hetsett.dplot <- tidy(attack.h1.full.hetsett)[attack.h1.full.hetsett$term == "rsb",] 
attack.h1.full.hetsett.dplot$Group <- "Actual presence" #label the type of estimate

attack.h1.full.hetcamp.pla.dplot <- tidy(attack.h1.full.hetcamp.pla)[attack.h1.full.hetcamp.pla$term == "rcb.placebo",] 
attack.h1.full.hetcamp.pla.dplot$Group <- "Placebo presence"

attack.h1.full.hetsett.pla.dplot <- tidy(attack.h1.full.hetsett.pla)[attack.h1.full.hetsett.pla$term == "rsb.placebo",] 
attack.h1.full.hetsett.pla.dplot$Group <- "Placebo presence"

plot.attack.h1.full.hetcamp <- rbind(attack.h1.full.hetcamp.dplot, #combine for ggplot
                             attack.h1.full.hetcamp.pla.dplot)
 
plot.attack.h1.full.hetsett <- rbind(attack.h1.full.hetsett.dplot, #combine for ggplot
                             attack.h1.full.hetsett.pla.dplot)

#subset data
attack.h1.sub.hetcamp.dplot <- tidy(attack.h1.sub.hetcamp)[attack.h1.sub.hetcamp$term == "rcb",] 
attack.h1.sub.hetcamp.dplot$Group <- "Actual presence" #label the type of estimate

attack.h1.sub.hetsett.dplot <- tidy(attack.h1.sub.hetsett)[attack.h1.sub.hetsett$term == "rsb",] 
attack.h1.sub.hetsett.dplot$Group <- "Actual presence" #label the type of estimate

attack.h1.sub.hetcamp.pla.dplot <- tidy(attack.h1.sub.hetcamp.pla)[attack.h1.sub.hetcamp.pla$term == "rcb.placebo",] 
attack.h1.sub.hetcamp.pla.dplot$Group <- "Placebo presence"

attack.h1.sub.hetsett.pla.dplot <- tidy(attack.h1.sub.hetsett.pla)[attack.h1.sub.hetsett.pla$term == "rsb.placebo",] 
attack.h1.sub.hetsett.pla.dplot$Group <- "Placebo presence"

plot.attack.h1.sub.hetcamp <- rbind(attack.h1.sub.hetcamp.dplot, #combine for ggplot
                             attack.h1.sub.hetcamp.pla.dplot)
 
plot.attack.h1.sub.hetsett <- rbind(attack.h1.sub.hetsett.dplot, #combine for ggplot
                             attack.h1.sub.hetsett.pla.dplot)

@

<<HetBattleDeath_CampSettH1, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 1, with full data
best.h1.full.hetcamp <- lm_robust(log(best+1) ~ rcb + 
                       rcb.other + 
                       rsb + #control for settlements
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata)

#summary(best.h1.full.hetcamp)

best.h1.full.hetsett <- lm_robust(log(best+1) ~ rsb + 
                       rsb.other + 
                       rcb + #control for camps
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata)

#summary(best.h1.full.hetsett)

## Placebo model
best.h1.full.hetcamp.pla <- lm_robust(log(best+1) ~ rcb.placebo + 
                       rcb.other + 
                       rsb + #control for settlements
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrcb)

#summary(best.h1.full.hetcamp.pla)

best.h1.full.hetsett.pla <- lm_robust(log(best+1) ~ rsb.placebo + 
                       rsb.other + 
                       rcb + #control for camps
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrsb)

#summary(best.h1.full.hetsett.pla)

## Treatment model 3, with subset data
best.h1.sub.hetcamp <- lm_robust(log(best+1) ~ rcb + 
                       rcb.other + 
                       rsb + #control for settlements
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(best.h1.sub.hetcamp)

best.h1.sub.hetsett <- lm_robust(log(best+1) ~ rsb + 
                       rsb.other + 
                       rcb + #control for camps
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(best.h1.sub.hetsett)


## Placebo model
best.h1.sub.hetcamp.pla <- lm_robust(log(best+1) ~ rcb.placebo + 
                       rcb.other + 
                       rsb + #control for settlements
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrcbsub)

#summary(best.h1.sub.hetcamp.pla)

best.h1.sub.hetsett.pla <- lm_robust(log(best+1) ~ rsb.placebo + 
                       rsb.other + 
                       rcb + #control for camps
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrsbsub)

#summary(best.h1.sub.hetsett.pla)


# Generate predicted probs
#full data
best.h1.full.hetcamp.dplot <- tidy(best.h1.full.hetcamp)[best.h1.full.hetcamp$term == "rcb",] 
best.h1.full.hetcamp.dplot$Group <- "Actual presence" #label the type of estimate

best.h1.full.hetsett.dplot <- tidy(best.h1.full.hetsett)[best.h1.full.hetsett$term == "rsb",] 
best.h1.full.hetsett.dplot$Group <- "Actual presence" #label the type of estimate

best.h1.full.hetcamp.pla.dplot <- tidy(best.h1.full.hetcamp.pla)[best.h1.full.hetcamp.pla$term == "rcb.placebo",] 
best.h1.full.hetcamp.pla.dplot$Group <- "Placebo presence"

best.h1.full.hetsett.pla.dplot <- tidy(best.h1.full.hetsett.pla)[best.h1.full.hetsett.pla$term == "rsb.placebo",] 
best.h1.full.hetsett.pla.dplot$Group <- "Placebo presence"

plot.best.h1.full.hetcamp <- rbind(best.h1.full.hetcamp.dplot, #combine for ggplot
                             best.h1.full.hetcamp.pla.dplot)
 
plot.best.h1.full.hetsett <- rbind(best.h1.full.hetsett.dplot, #combine for ggplot
                             best.h1.full.hetsett.pla.dplot)

#subset data
best.h1.sub.hetcamp.dplot <- tidy(best.h1.sub.hetcamp)[best.h1.sub.hetcamp$term == "rcb",] 
best.h1.sub.hetcamp.dplot$Group <- "Actual presence" #label the type of estimate

best.h1.sub.hetsett.dplot <- tidy(best.h1.sub.hetsett)[best.h1.sub.hetsett$term == "rsb",] 
best.h1.sub.hetsett.dplot$Group <- "Actual presence" #label the type of estimate

best.h1.sub.hetcamp.pla.dplot <- tidy(best.h1.sub.hetcamp.pla)[best.h1.sub.hetcamp.pla$term == "rcb.placebo",] 
best.h1.sub.hetcamp.pla.dplot$Group <- "Placebo presence"

best.h1.sub.hetsett.pla.dplot <- tidy(best.h1.sub.hetsett.pla)[best.h1.sub.hetsett.pla$term == "rsb.placebo",] 
best.h1.sub.hetsett.pla.dplot$Group <- "Placebo presence"

plot.best.h1.sub.hetcamp <- rbind(best.h1.sub.hetcamp.dplot, #combine for ggplot
                             best.h1.sub.hetcamp.pla.dplot)
 
plot.best.h1.sub.hetsett <- rbind(best.h1.sub.hetsett.dplot, #combine for ggplot
                             best.h1.sub.hetsett.pla.dplot)


@

<<Het_CampsSettH1, eval=TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 9, fig.height = 6, out.width= "1\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure shows the effect of refugee camps versus settlements (heterogeneous effects analysis) on conflict outcomes (black), compared to their respective placebo estimates, i.e. effect of future refugee camps vs. settlements (gray). All point estimates include 95$\\%$ CIs.")>>=
 
# setwd("Paper_Inputs")
# 
# # Bring in RData from UCDP data
# load("plot.onset.h1.sub.hetcamp.Rdata")
# load("plot.onset.h1.sub.hetsett.Rdata")
# load("plot.incidence.h1.sub.hetcamp.Rdata")
# load("plot.incidence.h1.sub.hetsett.Rdata")

# Select and rename columns
plot.onset.h1.sub.hetcamp$outcome <- "onset"
plot.onset.h1.sub.hetsett$outcome <- "onset"
plot.incidence.h1.sub.hetcamp$outcome <- "incidence"
plot.incidence.h1.sub.hetsett$outcome <- "incidence"

# plots
pd <- position_dodge(0.5) # move them .05 to the left and right

PLOT.onsetincidence.h1.sub.hetcamp <- bind_rows(plot.onset.h1.sub.hetcamp[plot.onset.h1.sub.hetcamp$Plot == 3,],
          plot.incidence.h1.sub.hetcamp[plot.incidence.h1.sub.hetcamp$Plot ==3,]) %>%
  mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.05, .05)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 

PLOT.onsetincidence.h1.sub.hetsett <- bind_rows(plot.onset.h1.sub.hetsett[plot.onset.h1.sub.hetsett$Plot == 3,],
          plot.incidence.h1.sub.hetsett[plot.incidence.h1.sub.hetsett$Plot ==3,]) %>%
  mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.05, .05)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 

PLOT.attack.h1.sub.hetcamp <- ggplot(plot.attack.h1.sub.hetcamp, 
                                     aes(x = outcome, y = estimate, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(conf.low)), 
                    ymax = as.numeric(as.character(conf.high))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-1.7,1.7) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.attack.h1.sub.hetsett <- ggplot(plot.attack.h1.sub.hetsett, 
                                     aes(x = outcome, y = estimate, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(conf.low)), 
                    ymax = as.numeric(as.character(conf.high))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-1.7,1.7) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h1.sub.hetcamp <- ggplot(plot.best.h1.sub.hetcamp, 
                                   aes(x = outcome, y = estimate, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(conf.low)), 
                    ymax = as.numeric(as.character(conf.high))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.3,.3) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h1.sub.hetsett <- ggplot(plot.best.h1.sub.hetsett, 
                                   aes(x = outcome, y = estimate, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(conf.low)), 
                    ymax = as.numeric(as.character(conf.high))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.3,.3) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )


((PLOT.onsetincidence.h1.sub.hetcamp + ggtitle('Effect of Refugee Camps (H1)') +
  theme(plot.title = element_text(hjust= -.42))) +
  (PLOT.attack.h1.sub.hetcamp + PLOT.best.h1.sub.hetcamp)) /
((PLOT.onsetincidence.h1.sub.hetsett + ggtitle('Effect of Refugee Settlements (H1)') +
  theme(plot.title = element_text(hjust= -.6))) +
  (PLOT.attack.h1.sub.hetsett + PLOT.best.h1.sub.hetsett))

@

Figure \ref{fig:Het_CampsSettH2} shows the heterogeneous effects of the presence of refugee camps versus settlements, respectively, conditional on refugee camps or settlements in other provinces of the same country-year. The top two plots compare the presence of camps and settlements in geographically concentrated cases, while the bottom two plots compare the presence of camps and settlements in geographically dispersed cases. All plots confirm that there is no effect of refugee presence even if there are refugee sites, whether camps or settlements, elsewhere (dispersed) in the country. Additionally, with respect to the \emph{conditional risk reduction effect} in the case of concentrated refugee presence in the host country, for camp presence these negative effects are on conflict onset and incidence while for settlement presence, these negative effects are on conflict intensity outcomes, violent events and battle deaths. 

% H2: Effect of Concentrated/Dispersed Refugee Camps vs. Settlements on Conflict 
<<HetOnset_CampSettH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

# If perfect separation, because too many countries with no conflict in the full dataset, uninformitive priors are chosen based on the stan-dev Prior Choice wiki: https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations

## Treatment model 1, with full data
onset.h2.full.hetcamp <- bayesglm(onset.n ~ rcb + 
                       rcb.other + 
                       rcb_rcb.other +
                       rsb + #control for settlements
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata[geddata$year < 2009,],
               prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h2.full.hetcamp)

onset.h2.full.hetsett <- bayesglm(onset.n ~ rsb + 
                       rsb.other + 
                       rsb_rsb.other +
                       rcb + #control for camps
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata[geddata$year < 2009,],
               prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h2.full.hetsett)

## Placebo model
onset.h2.full.hetcamp.pla <- bayesglm(onset.n ~ rcb.placebo + 
                       rcb.other + 
                       rcb.placebo_rcb.other +
                       rsb + #control for settlements
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrcb[plagedrcb$year < 2009,],
               prior.scale = 1, prior.df = 3,
               family = binomial(link = "logit")) 

#summary(onset.h2.full.hetcamp.pla)

onset.h2.full.hetsett.pla <- bayesglm(onset.n ~ rsb.placebo + 
                       rsb.other + 
                       rsb.placebo_rsb.other +
                       rcb + #control for camps
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrsb[plagedrsb$year < 2009,],
               prior.scale = 1, prior.df = 3,
               family = binomial(link = "logit")) 

#summary(onset.h2.full.hetsett.pla)

## Treatment model 3, with subset data
onset.h2.sub.hetcamp <- bayesglm(onset.n ~ rcb + 
                       rcb.other + 
                       rcb_rcb.other +
                       rsb + #control for settlements
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h2.sub.hetcamp)

onset.h2.sub.hetsett <- bayesglm(onset.n ~ rsb + 
                       rsb.other + 
                       rsb_rsb.other +
                       rcb + #control for camps
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h2.sub.hetsett)

## Placebo model
onset.h2.sub.hetcamp.pla <- bayesglm(onset.n ~ rcb.placebo + 
                       rcb.other + 
                       rcb.placebo_rcb.other +
                       rsb + #control for settlements
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrcbsub[plagedrcbsub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h2.sub.hetcamp.pla)

onset.h2.sub.hetsett.pla <- bayesglm(onset.n ~ rsb.placebo + 
                       rsb.other + 
                       rsb.placebo_rsb.other +
                       rcb + #control for camps
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrsbsub[plagedrsbsub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h2.sub.hetsett.pla)

# Generate predicted probs
onset.h2.full.hetcamp.dplot <- pred.bi.int(onset.h2.full.hetcamp) #generate predicted probs with interaction term
onset.h2.full.hetcamp.dplot$Group <- "Actual presence" #label the type of estimate

onset.h2.full.hetsett.dplot <- pred.bi.int(onset.h2.full.hetsett) #generate predicted probs with interaction term
onset.h2.full.hetsett.dplot$Group <- "Actual presence" #label the type of estimate

onset.h2.full.hetcamp.pla.dplot <- pred.bi.int(onset.h2.full.hetcamp.pla)
onset.h2.full.hetcamp.pla.dplot$Group <- "Placebo presence"

onset.h2.full.hetsett.pla.dplot <- pred.bi.int(onset.h2.full.hetsett.pla)
onset.h2.full.hetsett.pla.dplot$Group <- "Placebo presence"

plot.onset.h2.full.hetcamp <- rbind(onset.h2.full.hetcamp.dplot, #combine for ggplot
                            onset.h2.full.hetcamp.pla.dplot)

plot.onset.h2.full.hetsett <- rbind(onset.h2.full.hetsett.dplot, #combine for ggplot
                            onset.h2.full.hetsett.pla.dplot)

onset.h2.sub.hetcamp.dplot <- pred.bi.int(onset.h2.sub.hetcamp) #generate predicted probs with interaction term
onset.h2.sub.hetcamp.dplot$Group <- "Actual presence" #label the type of estimate

onset.h2.sub.hetsett.dplot <- pred.bi.int(onset.h2.sub.hetsett) #generate predicted probs with interaction term
onset.h2.sub.hetsett.dplot$Group <- "Actual presence" #label the type of estimate

onset.h2.sub.hetcamp.pla.dplot <- pred.bi.int(onset.h2.sub.hetcamp.pla)
onset.h2.sub.hetcamp.pla.dplot$Group <- "Placebo presence"

onset.h2.sub.hetsett.pla.dplot <- pred.bi.int(onset.h2.sub.hetsett.pla)
onset.h2.sub.hetsett.pla.dplot$Group <- "Placebo presence"

plot.onset.h2.sub.hetcamp <- rbind(onset.h2.sub.hetcamp.dplot, #combine for ggplot
                            onset.h2.sub.hetcamp.pla.dplot)

plot.onset.h2.sub.hetsett <- rbind(onset.h2.sub.hetsett.dplot, #combine for ggplot
                            onset.h2.sub.hetsett.pla.dplot)

# save output as Rdata files
# save(onset.h2.full.hetcamp, file = "onset.h2.full.hetcamp.Rdata")
# save(onset.h2.full.hetcamp.pla, file = "onset.h2.full.hetcamp.pla.Rdata")
# save(plot.onset.h2.full.hetcamp, file = "plot.onset.h2.full.hetcamp.Rdata")
# save(onset.h2.full.hetsett, file = "onset.h2.full.hetsett.Rdata")
# save(onset.h2.full.hetsett.pla, file = "onset.h2.full.hetsett.pla.Rdata")
# save(plot.onset.h2.full.hetsett, file = "plot.onset.h2.full.hetsett.Rdata")
# 
# save(onset.h2.sub.hetcamp, file = "onset.h2.sub.hetcamp.Rdata")
# save(onset.h2.sub.hetcamp.pla, file = "onset.h2.sub.hetcamp.pla.Rdata")
# save(plot.onset.h2.sub.hetcamp, file = "plot.onset.h2.sub.hetcamp.Rdata")
# save(onset.h2.sub.hetsett, file = "onset.h2.sub.hetsett.Rdata")
# save(onset.h2.sub.hetsett.pla, file = "onset.h2.sub.hetsett.pla.Rdata")
# save(plot.onset.h2.sub.hetsett, file = "plot.onset.h2.sub.hetsett.Rdata")

@

<<HetIncidence_CampSettH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

# If perfect separation, because too many countries with no conflict in the full dataset, uninformitive priors are chosen based on the stan-dev Prior Choice wiki: https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations

## Treatment model 1, with full data
incidence.h2.full.hetcamp <- bayesglm(incidence ~ rcb + 
                       rcb.other + 
                       rcb_rcb.other +
                       rsb + #control for settlements
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata[geddata$year < 2009,],
               prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h2.full.hetcamp)

incidence.h2.full.hetsett <- bayesglm(incidence ~ rsb + 
                       rsb.other + 
                       rsb_rsb.other +
                       rcb + #control for camps
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata[geddata$year < 2009,],
               prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h2.full.hetsett)

## Placebo model
incidence.h2.full.hetcamp.pla <- bayesglm(incidence ~ rcb.placebo + 
                       rcb.other + 
                       rcb.placebo_rcb.other +
                       rsb + #control for settlements
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrcb[plagedrcb$year < 2009,],
               prior.scale = 1, prior.df = 3,
               family = binomial(link = "logit")) 

#summary(incidence.h2.full.hetcamp.pla)

incidence.h2.full.hetsett.pla <- bayesglm(incidence ~ rsb.placebo + 
                       rsb.other + 
                       rsb.placebo_rsb.other +
                       rcb + #control for camps
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrsb[plagedrsb$year < 2009,],
               prior.scale = 1, prior.df = 3,
               family = binomial(link = "logit")) 

#summary(incidence.h2.full.hetsett.pla)

## Treatment model 3, with subset data
incidence.h2.sub.hetcamp <- bayesglm(incidence ~ rcb + 
                       rcb.other + 
                       rcb_rcb.other +
                       rsb + #control for settlements
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h2.sub.hetcamp)

incidence.h2.sub.hetsett <- bayesglm(incidence ~ rsb + 
                       rsb.other + 
                       rsb_rsb.other +
                       rcb + #control for camps
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h2.sub.hetsett)

## Placebo model
incidence.h2.sub.hetcamp.pla <- bayesglm(incidence ~ rcb.placebo + 
                       rcb.other + 
                       rcb.placebo_rcb.other +
                       rsb + #control for settlements
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrcbsub[plagedrcbsub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h2.sub.hetcamp.pla)

incidence.h2.sub.hetsett.pla <- bayesglm(incidence ~ rsb.placebo + 
                       rsb.other + 
                       rsb.placebo_rsb.other +
                       rcb + #control for camps
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrsbsub[plagedrsbsub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h2.sub.hetsett.pla)

# Generate predicted probs
incidence.h2.full.hetcamp.dplot <- pred.bi.int(incidence.h2.full.hetcamp) #generate predicted probs with interaction term
incidence.h2.full.hetcamp.dplot$Group <- "Actual presence" #label the type of estimate

incidence.h2.full.hetsett.dplot <- pred.bi.int(incidence.h2.full.hetsett) #generate predicted probs with interaction term
incidence.h2.full.hetsett.dplot$Group <- "Actual presence" #label the type of estimate

incidence.h2.full.hetcamp.pla.dplot <- pred.bi.int(incidence.h2.full.hetcamp.pla)
incidence.h2.full.hetcamp.pla.dplot$Group <- "Placebo presence"

incidence.h2.full.hetsett.pla.dplot <- pred.bi.int(incidence.h2.full.hetsett.pla)
incidence.h2.full.hetsett.pla.dplot$Group <- "Placebo presence"

plot.incidence.h2.full.hetcamp <- rbind(incidence.h2.full.hetcamp.dplot, #combine for ggplot
                            incidence.h2.full.hetcamp.pla.dplot)

plot.incidence.h2.full.hetsett <- rbind(incidence.h2.full.hetsett.dplot, #combine for ggplot
                            incidence.h2.full.hetsett.pla.dplot)

incidence.h2.sub.hetcamp.dplot <- pred.bi.int(incidence.h2.sub.hetcamp) #generate predicted probs with interaction term
incidence.h2.sub.hetcamp.dplot$Group <- "Actual presence" #label the type of estimate

incidence.h2.sub.hetsett.dplot <- pred.bi.int(incidence.h2.sub.hetsett) #generate predicted probs with interaction term
incidence.h2.sub.hetsett.dplot$Group <- "Actual presence" #label the type of estimate

incidence.h2.sub.hetcamp.pla.dplot <- pred.bi.int(incidence.h2.sub.hetcamp.pla)
incidence.h2.sub.hetcamp.pla.dplot$Group <- "Placebo presence"

incidence.h2.sub.hetsett.pla.dplot <- pred.bi.int(incidence.h2.sub.hetsett.pla)
incidence.h2.sub.hetsett.pla.dplot$Group <- "Placebo presence"

plot.incidence.h2.sub.hetcamp <- rbind(incidence.h2.sub.hetcamp.dplot, #combine for ggplot
                            incidence.h2.sub.hetcamp.pla.dplot)

plot.incidence.h2.sub.hetsett <- rbind(incidence.h2.sub.hetsett.dplot, #combine for ggplot
                            incidence.h2.sub.hetsett.pla.dplot)

# save output as Rdata files
# save(incidence.h2.full.hetcamp, file = "incidence.h2.full.hetcamp.Rdata")
# save(incidence.h2.full.hetcamp.pla, file = "incidence.h2.full.hetcamp.pla.Rdata")
# save(plot.incidence.h2.full.hetcamp, file = "plot.incidence.h2.full.hetcamp.Rdata")
# save(incidence.h2.full.hetsett, file = "incidence.h2.full.hetsett.Rdata")
# save(incidence.h2.full.hetsett.pla, file = "incidence.h2.full.hetsett.pla.Rdata")
# save(plot.incidence.h2.full.hetsett, file = "plot.incidence.h2.full.hetsett.Rdata")
# 
# save(incidence.h2.sub.hetcamp, file = "incidence.h2.sub.hetcamp.Rdata")
# save(incidence.h2.sub.hetcamp.pla, file = "incidence.h2.sub.hetcamp.pla.Rdata")
# save(plot.incidence.h2.sub.hetcamp, file = "plot.incidence.h2.sub.hetcamp.Rdata")
# save(incidence.h2.sub.hetsett, file = "incidence.h2.sub.hetsett.Rdata")
# save(incidence.h2.sub.hetsett.pla, file = "incidence.h2.sub.hetsett.pla.Rdata")
# save(plot.incidence.h2.sub.hetsett, file = "plot.incidence.h2.sub.hetsett.Rdata")

@

<<HetAttack_CampSettH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 2, with full data
attack.h2.full.hetcamp <- lm_robust(attack ~ rcb + 
                       rcb.other + 
                       rcb_rcb.other +
                       rsb +
                       attack_1 +
                       attack_neighbors_sum + 
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata)

#summary(attack.h2.full.hetcamp)

attack.h2.full.hetsett <- lm_robust(attack ~ rsb + 
                       rsb.other + 
                       rsb_rsb.other +
                       rcb +
                       attack_1 +
                       attack_neighbors_sum + #using mean is too dependent on number of neighboring provinces
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata)

#summary(attack.h2.full.hetsett)


## Placebo model
attack.h2.full.hetcamp.pla <- lm_robust(attack ~ rcb.placebo + 
                       rcb.other + 
                       rcb.placebo_rcb.other +
                       rsb +
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrcb)

#summary(attack.h2.full.hetcamp.pla)

attack.h2.full.hetsett.pla <- lm_robust(attack ~ rsb.placebo + 
                       rsb.other + 
                       rsb.placebo_rsb.other +
                       rcb +
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrsb)

#summary(attack.h2.full.hetsett.pla)


## Treatment model 3, with subset data
attack.h2.sub.hetcamp <- lm_robust(attack ~ rcb + 
                       rcb.other + 
                       rcb_rcb.other +
                       rsb +
                       attack_1 +
                       attack_neighbors_sum + #using mean is too dependent on number of neighboring provinces
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(attack.h2.sub.hetcamp)

attack.h2.sub.hetsett <- lm_robust(attack ~ rsb + 
                       rsb.other + 
                       rsb_rsb.other +
                       rcb +
                       attack_1 +
                       attack_neighbors_sum + #using mean is too dependent on number of neighboring provinces
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(attack.h2.sub.hetsett)


## Placebo model
attack.h2.sub.hetcamp.pla <- lm_robust(attack ~ rcb.placebo + 
                       rcb.other + 
                       rcb.placebo_rcb.other +
                       rsb +
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrcbsub)

#summary(attack.h2.sub.hetcamp.pla)

attack.h2.sub.hetsett.pla <- lm_robust(attack ~ rsb.placebo + 
                       rsb.other + 
                       rsb.placebo_rsb.other +
                       rcb +
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrsbsub)

#summary(attack.h2.sub.hetsett.pla)


# Generate predicted probs
# full data
attack.h2.full.hetcamp.dplot <- pred.lm.int.r(attack.h2.full.hetcamp)
attack.h2.full.hetcamp.dplot$Group <- "Actual presence" #label the type of estimate

attack.h2.full.hetsett.dplot <- pred.lm.int.r(attack.h2.full.hetsett)
attack.h2.full.hetsett.dplot$Group <- "Actual presence" #label the type of estimate

attack.h2.full.hetcamp.pla.dplot <- pred.lm.int.r(attack.h2.full.hetcamp.pla)
attack.h2.full.hetcamp.pla.dplot$Group <- "Placebo presence"

attack.h2.full.hetsett.pla.dplot <- pred.lm.int.r(attack.h2.full.hetsett.pla)
attack.h2.full.hetsett.pla.dplot$Group <- "Placebo presence"

plot.attack.h2.full.hetcamp <- rbind(attack.h2.full.hetcamp.dplot, #combine for ggplot
                             attack.h2.full.hetcamp.pla.dplot)

plot.attack.h2.full.hetsett <- rbind(attack.h2.full.hetsett.dplot, #combine for ggplot
                             attack.h2.full.hetsett.pla.dplot)

# subset data
attack.h2.sub.hetcamp.dplot <- pred.lm.int.r(attack.h2.sub.hetcamp)
attack.h2.sub.hetcamp.dplot$Group <- "Actual presence" #label the type of estimate

attack.h2.sub.hetsett.dplot <- pred.lm.int.r(attack.h2.sub.hetsett)
attack.h2.sub.hetsett.dplot$Group <- "Actual presence" #label the type of estimate

attack.h2.sub.hetcamp.pla.dplot <- pred.lm.int.r(attack.h2.sub.hetcamp.pla)
attack.h2.sub.hetcamp.pla.dplot$Group <- "Placebo presence"

attack.h2.sub.hetsett.pla.dplot <- pred.lm.int.r(attack.h2.sub.hetsett.pla)
attack.h2.sub.hetsett.pla.dplot$Group <- "Placebo presence"

plot.attack.h2.sub.hetcamp <- rbind(attack.h2.sub.hetcamp.dplot, #combine for ggplot
                             attack.h2.sub.hetcamp.pla.dplot)

plot.attack.h2.sub.hetsett <- rbind(attack.h2.sub.hetsett.dplot, #combine for ggplot
                             attack.h2.sub.hetsett.pla.dplot)

@

<<HetBattleDeath_CampSettH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 2, with full data
best.h2.full.hetcamp <- lm_robust(log(best+1) ~ rcb + 
                       rcb.other + 
                       rcb_rcb.other +
                       rsb +
                       log(best_1+1) +
                       log(best_neighbors_sum+1) +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata)

#summary(best.h2.full.hetcamp)

best.h2.full.hetsett <- lm_robust(log(best+1) ~ rsb + 
                       rsb.other + 
                       rsb_rsb.other +
                       rcb +
                       log(best_1+1) +
                       log(best_neighbors_sum+1) +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata)

#summary(best.h2.full.hetsett)


## Placebo model
best.h2.full.hetcamp.pla <- lm_robust(log(best+1) ~ rcb.placebo + 
                       rcb.other + 
                       rcb.placebo_rcb.other +
                       rsb +
                       log(best_1+1) +
                       log(best_neighbors_sum+1) +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrcb)

#summary(best.h2.full.hetcamp.pla)

best.h2.full.hetsett.pla <- lm_robust(log(best+1) ~ rsb.placebo + 
                       rsb.other + 
                       rsb.placebo_rsb.other +
                       rcb +
                       log(best_1+1) +
                       log(best_neighbors_sum+1) +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrsb)

#summary(best.h2.full.hetsett.pla)


## Treatment model 3, with subset data
best.h2.sub.hetcamp <- lm_robust(log(best+1) ~ rcb + 
                       rcb.other + 
                       rcb_rcb.other +
                       rsb +
                       log(best_1+1) +
                       log(best_neighbors_sum+1) +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(best.h2.sub.hetcamp)

best.h2.sub.hetsett <- lm_robust(log(best+1) ~ rsb + 
                       rsb.other + 
                       rsb_rsb.other +
                       rcb +
                       log(best_1+1) +
                       log(best_neighbors_sum+1) +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(best.h2.sub.hetsett)


## Placebo model
best.h2.sub.hetcamp.pla <- lm_robust(log(best+1) ~ rcb.placebo + 
                       rcb.other + 
                       rcb.placebo_rcb.other +
                       rsb +
                       log(best_1+1) +
                       log(best_neighbors_sum+1) +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrcbsub)

#summary(best.h2.sub.hetcamp.pla)

best.h2.sub.hetsett.pla <- lm_robust(log(best+1) ~ rsb.placebo + 
                       rsb.other + 
                       rsb.placebo_rsb.other +
                       rcb +
                       log(best_1+1) +
                       log(best_neighbors_sum+1) +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrsbsub)

#summary(best.h2.sub.hetsett.pla)


# Generate predicted probs
# full data
best.h2.full.hetcamp.dplot <- pred.lm.int.r(best.h2.full.hetcamp)
best.h2.full.hetcamp.dplot$Group <- "Actual presence" #label the type of estimate

best.h2.full.hetsett.dplot <- pred.lm.int.r(best.h2.full.hetsett)
best.h2.full.hetsett.dplot$Group <- "Actual presence" #label the type of estimate

best.h2.full.hetcamp.pla.dplot <- pred.lm.int.r(best.h2.full.hetcamp.pla)
best.h2.full.hetcamp.pla.dplot$Group <- "Placebo presence"

best.h2.full.hetsett.pla.dplot <- pred.lm.int.r(best.h2.full.hetsett.pla)
best.h2.full.hetsett.pla.dplot$Group <- "Placebo presence"

plot.best.h2.full.hetcamp <- rbind(best.h2.full.hetcamp.dplot, #combine for ggplot
                             best.h2.full.hetcamp.pla.dplot)

plot.best.h2.full.hetsett <- rbind(best.h2.full.hetsett.dplot, #combine for ggplot
                             best.h2.full.hetsett.pla.dplot)

# subset data
best.h2.sub.hetcamp.dplot <- pred.lm.int.r(best.h2.sub.hetcamp)
best.h2.sub.hetcamp.dplot$Group <- "Actual presence" #label the type of estimate

best.h2.sub.hetsett.dplot <- pred.lm.int.r(best.h2.sub.hetsett)
best.h2.sub.hetsett.dplot$Group <- "Actual presence" #label the type of estimate

best.h2.sub.hetcamp.pla.dplot <- pred.lm.int.r(best.h2.sub.hetcamp.pla)
best.h2.sub.hetcamp.pla.dplot$Group <- "Placebo presence"

best.h2.sub.hetsett.pla.dplot <- pred.lm.int.r(best.h2.sub.hetsett.pla)
best.h2.sub.hetsett.pla.dplot$Group <- "Placebo presence"

plot.best.h2.sub.hetcamp <- rbind(best.h2.sub.hetcamp.dplot, #combine for ggplot
                             best.h2.sub.hetcamp.pla.dplot)

plot.best.h2.sub.hetsett <- rbind(best.h2.sub.hetsett.dplot, #combine for ggplot
                             best.h2.sub.hetsett.pla.dplot)

@

<<Het_CampsSettH2, eval=TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 9, fig.height = 12, out.width= ".95\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure shows the effect of refugee camps vs. settlements (heterogeneous effects analysis) on conflict outcomes (black) compared to their respective placebo estimates, i.e. effect of future refugee camps vs. settlements (gray), conditional on refugee camps vs. settlements in other provinces of the same country-year. All point estimates include 95$\\%$ CIs.")>>=

# setwd("Paper_Inputs")
# 
# # Bring in RData from UCDP data
# load("plot.onset.h2.sub.hetcamp.Rdata")
# load("plot.onset.h2.sub.hetsett.Rdata")
# load("plot.incidence.h2.sub.hetcamp.Rdata")
# load("plot.incidence.h2.sub.hetsett.Rdata")

# Select and rename columns
plot.onset.h2.sub.hetcamp$outcome <- plot.onset.h2.sub.hetsett$outcome <- "onset"
plot.incidence.h2.sub.hetcamp$outcome <- plot.incidence.h2.sub.hetsett$outcome <- "incidence"
plot.attack.h2.sub.hetcamp$outcome <- plot.attack.h2.sub.hetsett$outcome <- "attack"
plot.best.h2.sub.hetcamp$outcome <- plot.best.h2.sub.hetsett$outcome <- "log(best + 1)"

# plots
pd <- position_dodge(0.5) # move them .05 to the left and right

# CAMPS: h2a is concentrated, h2b is dispersed
PLOT.onsetincidence.h2a.sub.hetcamp <- bind_rows(plot.onset.h2.sub.hetcamp[plot.onset.h2.sub.hetcamp$Plot == 3,],
          plot.incidence.h2.sub.hetcamp[plot.incidence.h2.sub.hetcamp$Plot ==3,]) %>%
  mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.12, .12)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 

PLOT.onsetincidence.h2b.sub.hetcamp <- bind_rows(plot.onset.h2.sub.hetcamp[plot.onset.h2.sub.hetcamp$Plot == 6,],
          plot.incidence.h2.sub.hetcamp[plot.incidence.h2.sub.hetcamp$Plot ==6,]) %>%
  mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.12, .12)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.attack.h2a.sub.hetcamp <- ggplot(plot.attack.h2.sub.hetcamp[c(1,3),], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-5,5) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.attack.h2b.sub.hetcamp <- ggplot(plot.attack.h2.sub.hetcamp[c(2,4),], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-5,5) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h2a.sub.hetcamp <- ggplot(plot.best.h2.sub.hetcamp[c(1,3),], 
                                    aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.5,.5) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h2b.sub.hetcamp <- ggplot(plot.best.h2.sub.hetcamp[c(2,4),], 
                                    aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.5,.5) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )


# SETTLEMENTS: h2a is concentrated, h2b is dispersed
PLOT.onsetincidence.h2a.sub.hetsett <- bind_rows(plot.onset.h2.sub.hetsett[plot.onset.h2.sub.hetsett$Plot == 3,],
          plot.incidence.h2.sub.hetsett[plot.incidence.h2.sub.hetsett$Plot ==3,]) %>%
  mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.12, .12)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.onsetincidence.h2b.sub.hetsett <- bind_rows(plot.onset.h2.sub.hetsett[plot.onset.h2.sub.hetsett$Plot == 6,],
          plot.incidence.h2.sub.hetsett[plot.incidence.h2.sub.hetsett$Plot ==6,]) %>%
  mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.12, .12)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.attack.h2a.sub.hetsett <- ggplot(plot.attack.h2.sub.hetsett[c(1,3),], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-5,5) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.attack.h2b.sub.hetsett <- ggplot(plot.attack.h2.sub.hetsett[c(2,4),], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-5,5) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h2a.sub.hetsett <- ggplot(plot.best.h2.sub.hetsett[c(1,3),], 
                                    aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.5,.5) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h2b.sub.hetsett <- ggplot(plot.best.h2.sub.hetsett[c(2,4),], 
                                    aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.5,.5) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

patch1.hetcamp <- (PLOT.onsetincidence.h2a.sub.hetcamp + ggtitle("Effect of Concentrated Refugee Camps (H2a)") + 
  theme(plot.title = element_text(hjust= -3))) +
  (PLOT.attack.h2a.sub.hetcamp + PLOT.best.h2a.sub.hetcamp) 

patch1.hetsett <- (PLOT.onsetincidence.h2a.sub.hetsett + ggtitle("Effect of Concentrated Refugee Settlements (H2a)") + 
  theme(plot.title = element_text(hjust= 2.5))) +
  (PLOT.attack.h2a.sub.hetsett + PLOT.best.h2a.sub.hetsett) 

patch2.hetcamp <- (PLOT.onsetincidence.h2b.sub.hetcamp + ggtitle("Effect of Dispersed Refugee Camps (H2b)") + 
  theme(plot.title = element_text(hjust= -1))) +
  (PLOT.attack.h2b.sub.hetcamp + PLOT.best.h2b.sub.hetcamp) 

patch2.hetsett <- (PLOT.onsetincidence.h2b.sub.hetsett + ggtitle("Effect of Dispersed Refugee Settlements (H2b)") + 
  theme(plot.title = element_text(hjust= -5.5))) +
  (PLOT.attack.h2b.sub.hetsett + PLOT.best.h2b.sub.hetsett) 

patch1.hetcamp/
patch1.hetsett/
patch2.hetcamp/
patch2.hetsett

@


Figure \ref{fig:Het_CampSett_H3} shows the heterogeneous effects on nighttime lights of the presence of refugee camps versus settlements, respectively, conditional on refugee camps or settlements in other provinces of the same country-year. The top two plots compare the presence of camps and settlements in geographically concentrated cases, while the bottom two plots compare the presence of camps and settlements in geographically dispersed cases. Although not statistically significant, there appears to be positive effects of refugee camps and settlements in geographically concentrated cases; the positive effect for camps approaches statistical significance. This appears supportive of our argument and other findings on development.

% H3: Effect of Concentrated/Dispersed Refugee Camps vs. Settlements on Nightlights 
<<HetNightLights_CampSettH2, eval=TRUE, echo = FALSE, tidy=TRUE, fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 3, with subset data
nlights_calib_mean.h2.sub.hetcamp <- lm_robust(nlights_calib_mean ~ rcb + 
                       rcb.other + 
                       rcb_rcb.other +
                       rsb +
                       nlights_calib_mean_1 +
                       nlights_mean_neighbor +
                       attack_neighbors_sum + 
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(nlights_calib_mean.h2.sub.hetcamp)

nlights_calib_mean.h2.sub.hetsett <- lm_robust(nlights_calib_mean ~ rsb + 
                       rsb.other + 
                       rsb_rsb.other +
                       rcb +
                       nlights_calib_mean_1 +
                       nlights_mean_neighbor +
                       attack_neighbors_sum + 
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(nlights_calib_mean.h2.sub.hetsett)


## Placebo model
nlights_calib_mean.h2.sub.hetcamp.pla <- lm_robust(nlights_calib_mean ~ rcb.placebo + 
                       rcb.other + 
                       rcb.placebo_rcb.other +
                       rsb +
                       nlights_calib_mean_1 +
                       nlights_mean_neighbor +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrcbsub)

#summary(nlights_calib_mean.h2.sub.hetcamp.pla)

nlights_calib_mean.h2.sub.hetsett.pla <- lm_robust(nlights_calib_mean ~ rsb.placebo + 
                       rsb.other + 
                       rsb.placebo_rsb.other +
                       rcb +
                       nlights_calib_mean_1 +
                       nlights_mean_neighbor +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist + 
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrsbsub)

#summary(nlights_calib_mean.h2.sub.hetsett.pla)


# Generate predicted probs
nlights_calib_mean.h2.sub.hetcamp.dplot <- pred.lm.int.r(nlights_calib_mean.h2.sub.hetcamp)
nlights_calib_mean.h2.sub.hetcamp.dplot$Group <- "Actual presence" #label the type of estimate

nlights_calib_mean.h2.sub.hetsett.dplot <- pred.lm.int.r(nlights_calib_mean.h2.sub.hetsett)
nlights_calib_mean.h2.sub.hetsett.dplot$Group <- "Actual presence" #label the type of estimate

nlights_calib_mean.h2.sub.hetcamp.pla.dplot <- pred.lm.int.r(nlights_calib_mean.h2.sub.hetcamp.pla)
nlights_calib_mean.h2.sub.hetcamp.pla.dplot$Group <- "Placebo presence"

nlights_calib_mean.h2.sub.hetsett.pla.dplot <- pred.lm.int.r(nlights_calib_mean.h2.sub.hetsett.pla)
nlights_calib_mean.h2.sub.hetsett.pla.dplot$Group <- "Placebo presence"

plot.nlights_calib_mean.h2.sub.hetcamp <- rbind(nlights_calib_mean.h2.sub.hetcamp.dplot, #combine for ggplot
                             nlights_calib_mean.h2.sub.hetcamp.pla.dplot)

plot.nlights_calib_mean.h2.sub.hetsett <- rbind(nlights_calib_mean.h2.sub.hetsett.dplot, #combine for ggplot
                             nlights_calib_mean.h2.sub.hetsett.pla.dplot)

@

<<Het_CampSett_H3, eval=TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 9, fig.height = 6, out.width= "1\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure shows the effect of refugee refugee camps vs. settlements (heterogeneous effects analysis) on average nighttime lights (black), compared to their respective placebo estimates, i.e. effect of future refugee presence (gray), conditional on refugee presence in other provinces of the same country-year. All point estimates include 95$\\%$ CIs.")>>=
 
# CAMPS: h2a is concentrated, h2b is dispersed
PLOT.nlights_calib_mean.h2a.sub.hetcamp <- ggplot(plot.nlights_calib_mean.h2.sub.hetcamp[c(1,3),], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.1,.3) +
  ylab("Change in Predicted Lights") +
  xlab("") +
  scale_x_discrete(labels=c("Average Nighttime Lights")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 

PLOT.nlights_calib_mean.h2b.sub.hetcamp <- ggplot(plot.nlights_calib_mean.h2.sub.hetcamp[c(2,4),], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.1,.3) +
  ylab("Change in Predicted Lights") +
  xlab("") +
  scale_x_discrete(labels=c("Average Nighttime Lights")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )


# SETTLEMENTS: h2a is concentrated, h2b is dispersed
PLOT.nlights_calib_mean.h2a.sub.hetsett <- ggplot(plot.nlights_calib_mean.h2.sub.hetsett[c(1,3),], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.1,.3) +
  ylab("Change in Predicted Lights") +
  xlab("") +
  scale_x_discrete(labels=c("Average Nighttime Lights")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.nlights_calib_mean.h2b.sub.hetsett <- ggplot(plot.nlights_calib_mean.h2.sub.hetsett[c(2,4),], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.1,.3) +
  ylab("Change in Predicted Lights") +
  xlab("") +
  scale_x_discrete(labels=c("Average Nighttime Lights")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )


patch1.hettcamp <- (PLOT.nlights_calib_mean.h2a.sub.hetcamp + 
                      ggtitle("Effect of Concentrated Refugee Camps (H3a)") + 
  theme(plot.title = element_text(hjust= 0, size = 11))) 

patch1.hettsett <- (PLOT.nlights_calib_mean.h2a.sub.hetsett + 
                        ggtitle("Effect of Concentrated Refugee Settlements (H3a)") + 
  theme(plot.title = element_text(hjust= 0, size = 11))) 

patch2.hettcamp <- (PLOT.nlights_calib_mean.h2b.sub.hetcamp + 
                      ggtitle("Effect of Dispersed Refugee Camps (H3b)") + 
  theme(plot.title = element_text(hjust= 0, size = 11)))

patch2.hettsett <- (PLOT.nlights_calib_mean.h2b.sub.hetsett + 
                        ggtitle("Effect of Dispersed Refugee Settlements (H3b)") + 
  theme(plot.title = element_text(hjust= 0, size = 11))) 

(patch1.hettcamp + patch1.hettsett) / 
  (patch2.hettcamp + patch2.hettsett) 

@


%%% NEAR VS. FAR BORDER
\newpage
\subsection{Heterogeneous Effects of Provinces Near vs. Far from Border}
\label{SIsec:hte_nearborder}

This section shows heterogeneous effects by whether provinces are near or far from the international border. Near is coded as within or equal to 100km, which is about the median distance in our data, and far is coded as greater than 100km. 

Figure \ref{fig:Het_NearBorderH1} shows the heterogeneous effects for provinces far from the border (top plot) versus those near the border (bottom plot), respectively, on the main conflict outcomes. All plots confirm that there is no effect of refugee presence on conflict. In fact, there is a negative effect for provinces in the interior of countries (far from border) on onset and incidence, which may also be consistent with our theoretical predictions of increased development in these cases.

% H1: Effect of Refugee Presence on Conflict cond. on Proximity to Border

<<HetOnset_NearBorderH1, eval=TRUE, echo = FALSE, tidy=TRUE, fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 3, with subset data
onset.h1.het.nearborder.sub <- bayesglm(onset.n ~ rtb + 
                       nearborder +
                       rtb_nearborder + 
                       rtb.other + 
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       #log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h1.het.nearborder.sub)

## Placebo model
onset.h1.het.nearborder.sub.pla <- bayesglm(onset.n ~ rtb.placebo + 
                       nearborder +
                       rtb.placebo_nearborder + 
                       rtb.other + 
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       #log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub[plagedrtbsub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h1.het.nearborder.sub.pla)

# Generate predicted probs
onset.h1.het.nearborder.sub.dplot <- pred.bi.int(onset.h1.het.nearborder.sub) #generate predicted probs
onset.h1.het.nearborder.sub.dplot$Group <- "Actual presence" #label the type of estimate

onset.h1.het.nearborder.sub.pla.dplot <- pred.bi.int(onset.h1.het.nearborder.sub.pla)
onset.h1.het.nearborder.sub.pla.dplot$Group <- "Placebo presence"

plot.onset.h1.het.nearborder.sub <- rbind(onset.h1.het.nearborder.sub.dplot, #combine for ggplot
                                          onset.h1.het.nearborder.sub.pla.dplot)

# save output as Rdata files
#save(plot.onset.h1.het.nearborder.sub, file = "plot.onset.h1.het.nearborder.sub.Rdata")

@

<<HetIncidence_NearBorderH1, eval=TRUE, echo = FALSE, tidy=TRUE, fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 3, with subset data
incidence.h1.het.nearborder.sub <- bayesglm(incidence ~ rtb + 
                       nearborder +
                       rtb_nearborder + 
                       rtb.other + 
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       #log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h1.het.nearborder.sub)

## Placebo model
incidence.h1.het.nearborder.sub.pla <- bayesglm(incidence ~ rtb.placebo + 
                       nearborder +
                       rtb.placebo_nearborder + 
                       rtb.other + 
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       #log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub[plagedrtbsub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h1.het.nearborder.sub.pla)

# Generate predicted probs
incidence.h1.het.nearborder.sub.dplot <- pred.bi.int(incidence.h1.het.nearborder.sub) #generate predicted probs
incidence.h1.het.nearborder.sub.dplot$Group <- "Actual presence" #label the type of estimate

incidence.h1.het.nearborder.sub.pla.dplot <- pred.bi.int(incidence.h1.het.nearborder.sub.pla)
incidence.h1.het.nearborder.sub.pla.dplot$Group <- "Placebo presence"

plot.incidence.h1.het.nearborder.sub <- rbind(incidence.h1.het.nearborder.sub.dplot, #combine for ggplot
                                          incidence.h1.het.nearborder.sub.pla.dplot)

# save output as Rdata files
#save(plot.incidence.h1.het.nearborder.sub, file = "plot.incidence.h1.het.nearborder.sub.Rdata")

@

<<HetAttack_NearBorderH1, eval=TRUE, echo = FALSE, tidy=TRUE, fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE>>=

# HTE with nearborder variable, which is based on bdist3 so we remove bdist3 as a control

## Treatment model 3, with subset data
attack.h1.het.nearborder.sub <- lm_robust(attack ~ rtb + 
                       nearborder + 
                       rtb_nearborder +
                       rtb.other +   
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       #log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(attack.h1.het.nearborder.sub)

## Placebo model
attack.h1.het.nearborder.sub.pla <- lm_robust(attack ~ rtb.placebo + 
                       nearborder + 
                       rtb.placebo_nearborder +
                       rtb.other + 
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       #log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub)

#summary(attack.h1.het.nearborder.sub.pla)


# Generate predicted probs
attack.h1.het.nearborder.sub.dplot <- pred.lm.int.r(attack.h1.het.nearborder.sub)
attack.h1.het.nearborder.sub.dplot$Group <- "Actual presence" #label the type of estimate

attack.h1.het.nearborder.sub.pla.dplot <- pred.lm.int.r(attack.h1.het.nearborder.sub.pla)
attack.h1.het.nearborder.sub.pla.dplot$Group <- "Placebo presence"

plot.attack.h1.het.nearborder.sub <- rbind(attack.h1.het.nearborder.sub.dplot, #combine for ggplot
                                           attack.h1.het.nearborder.sub.pla.dplot)

@

<<HetBattleDeath_NearBorderH1, eval=TRUE, echo = FALSE, tidy=TRUE, fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE>>=

# HTE with nearborder variable, which is based on bdist3 so we remove bdist3 as a control

## Treatment model 3, with subset data
best.h1.het.nearborder.sub <- lm_robust(log(best+1) ~ rtb + 
                       nearborder + 
                       rtb_nearborder +
                       rtb.other + 
                       log(best_1+1) +
                       attack_neighbors_sum + 
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       #log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(best.h1.het.nearborder.sub)

## Placebo model
best.h1.het.nearborder.sub.pla <- lm_robust(log(best+1) ~ rtb.placebo + 
                       nearborder + 
                       rtb.placebo_nearborder +
                       rtb.other + 
                       log(best_1+1) +
                       attack_neighbors_sum + 
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                      #log_bdist2 + 
                      #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year), 
               data = plagedrtbsub)

#summary(best.h1.het.nearborder.sub)

# Generate predicted probs
best.h1.het.nearborder.sub.dplot <- pred.lm.int.r(best.h1.het.nearborder.sub)
best.h1.het.nearborder.sub.dplot$Group <- "Actual presence" #label the type of estimate

best.h1.het.nearborder.sub.pla.dplot <- pred.lm.int.r(best.h1.het.nearborder.sub.pla)
best.h1.het.nearborder.sub.pla.dplot$Group <- "Placebo presence"

plot.best.h1.het.nearborder.sub <- rbind(best.h1.het.nearborder.sub.dplot, #combine for ggplot
                          best.h1.het.nearborder.sub.pla.dplot)

@

<<Het_NearBorderH1, eval=TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 9, fig.height = 6, out.width= "1\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure shows the effect of refugee presence in provinces near vs. far from the border (heterogeneous effects analysis) on conflict outcomes (black), compared to their respective placebo estimates (gray). All point estimates include 95$\\%$ CIs.")>>=

# setwd("Paper_Inputs")
# 
# # Bring in RData from UCDP data
# load("plot.onset.h1.het.nearborder.sub.Rdata")
# load("plot.incidence.h1.het.nearborder.sub.Rdata")

# Select and rename columns
plot.onset.h1.het.nearborder.sub$outcome <- "onset"
plot.incidence.h1.het.nearborder.sub$outcome <- "incidence"
plot.attack.h1.het.nearborder.sub$outcome <- "attack"
plot.best.h1.het.nearborder.sub$outcome <- "log(best + 1)"

# plots
pd <- position_dodge(0.5) # move them .05 to the left and right

PLOT.onsetincidence.h1.het.farborder.sub <- 
  bind_rows(plot.onset.h1.het.nearborder.sub[plot.onset.h1.het.nearborder.sub$Plot == 3,],
            plot.incidence.h1.het.nearborder.sub[plot.incidence.h1.het.nearborder.sub$Plot == 3,]) %>%
  mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.05, .05)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.onsetincidence.h1.het.nearborder.sub <- 
  bind_rows(plot.onset.h1.het.nearborder.sub[plot.onset.h1.het.nearborder.sub$Plot == 6,],
            plot.incidence.h1.het.nearborder.sub[plot.incidence.h1.het.nearborder.sub$Plot == 6,]) %>%
  mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.05, .05)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )


PLOT.attack.h1.het.farborder.sub <- ggplot(plot.attack.h1.het.nearborder.sub[c(1,3),], 
                           aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-3,3) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.attack.h1.het.nearborder.sub <- ggplot(plot.attack.h1.het.nearborder.sub[c(2,4),], 
                           aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-3,3) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h1.het.farborder.sub <- ggplot(plot.best.h1.het.nearborder.sub[c(1,3),], 
                           aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.5,.5) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h1.het.nearborder.sub <- ggplot(plot.best.h1.het.nearborder.sub[c(2,4),], 
                           aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.5,.5) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

#plot
patch1 <- (PLOT.onsetincidence.h1.het.farborder.sub + 
             PLOT.attack.h1.het.farborder.sub + 
             PLOT.best.h1.het.farborder.sub +  
             plot_layout(widths = c(2, 1, 1))) +
             ggtitle("Effect of Refugee Presence in Provinces Far from Border (H1)") + 
              theme(plot.title = element_text(hjust= 2.8, size = 11))

patch2 <- (PLOT.onsetincidence.h1.het.nearborder.sub + 
             PLOT.attack.h1.het.nearborder.sub + 
             PLOT.best.h1.het.nearborder.sub +
             plot_layout(widths = c(2, 1, 1))) +
             ggtitle("Effect of Refugee Presence in Provinces Near Border (H1)") + 
              theme(plot.title = element_text(hjust= 3.1, size = 11))
  
patch1/patch2

@

Figure \ref{fig:Het_NearBorderH2} shows the heterogeneous effects for provinces far from the border versus those near the border respectively, conditional on refugee presence in other provinces of the same country-year. The top two plots compare far and near border provinces when refugee presence is geographically concentrated, while the bottom two plots do so in geographically dispersed cases. There are no effects of refugee presence in dispersed cases. Additionally, it appears that the conditional risk reduction effects in the cases of concentrated refugee presence are small but present for near border provinces, but prominent for provinces in the interior (far from border). 

% H2: Effect of Concentrated/Dispersed Refugee Presence on Conflict cond. on Proximity to Border
<<HetOnset_NearBorderH2, eval=TRUE, echo = FALSE, tidy=TRUE, fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model with rtb nearborder interaction, with subset data
onset.h2.het.nearborder.sub <- bayesglm(onset.n ~ rtb +
                       rtb.other + 
                       nearborder +
                       rtb*rtb.other*nearborder + 
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       #log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               family = binomial(link = "logit")) 

#summary(onset.h2.het.nearborder.sub)

## Placebo model with rtb nearborder interaction, with subset data
onset.h2.het.nearborder.sub.pla <- bayesglm(onset.n ~ rtb.placebo + 
                       rtb.other + 
                       nearborder +
                       rtb.placebo*rtb.other*nearborder + 
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                      #log_bdist2 + 
                      #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub[plagedrtbsub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h2.het.nearborder.sub.pla)


# Generate predicted probs
onset.h2.het.nearborder.sub.dplot <- pred.bi.tripint(onset.h2.het.nearborder.sub, "nearborder", placebo = FALSE)
onset.h2.het.nearborder.sub.dplot$Group <- "Actual presence" #label the type of estimate

onset.h2.het.nearborder.sub.pla.dplot <- pred.bi.tripint(onset.h2.het.nearborder.sub.pla, "nearborder", placebo = TRUE)
onset.h2.het.nearborder.sub.pla.dplot$Group <- "Placebo presence" #label the type of estimate

plot.onset.h2.het.nearborder.sub <- rbind(onset.h2.het.nearborder.sub.dplot, #combine for ggplot
                                          onset.h2.het.nearborder.sub.pla.dplot)

# save output as Rdata files
#save(plot.onset.h2.het.nearborder.sub, file = "plot.onset.h2.het.nearborder.sub.Rdata")

@

<<HetIncidence_NearBorderH2, eval=TRUE, echo = FALSE, tidy=TRUE, fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE>>=

# These are the models we run, save as rdata, and bring in output for plotting:

## Treatment model with rtb nearborder interaction, with subset data
incidence.h2.het.nearborder.sub <- bayesglm(incidence ~ rtb +
                       rtb.other + 
                       nearborder +
                       rtb*rtb.other*nearborder + 
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       #log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               family = binomial(link = "logit")) 

#summary(incidence.h2.het.nearborder.sub)

## Placebo model with rtb nearborder interaction, with subset data
incidence.h2.het.nearborder.sub.pla <- bayesglm(incidence ~ rtb.placebo + 
                       rtb.other + 
                       nearborder +
                       rtb.placebo*rtb.other*nearborder + 
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                      #log_bdist2 + 
                      #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub[plagedrtbsub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h2.het.nearborder.sub.pla)


# Generate predicted probs
incidence.h2.het.nearborder.sub.dplot <- pred.bi.tripint(incidence.h2.het.nearborder.sub, "nearborder", placebo = FALSE)
incidence.h2.het.nearborder.sub.dplot$Group <- "Actual presence" #label the type of estimate

incidence.h2.het.nearborder.sub.pla.dplot <- pred.bi.tripint(incidence.h2.het.nearborder.sub.pla, "nearborder", placebo = TRUE)
incidence.h2.het.nearborder.sub.pla.dplot$Group <- "Placebo presence" #label the type of estimate

plot.incidence.h2.het.nearborder.sub <- rbind(incidence.h2.het.nearborder.sub.dplot, #combine for ggplot
                                          incidence.h2.het.nearborder.sub.pla.dplot)

# save output as Rdata files
#save(plot.incidence.h2.het.nearborder.sub, file = "plot.incidence.h2.het.nearborder.sub.Rdata")

@

<<HetAttack_NearBorderH2, eval=TRUE, echo = FALSE, tidy=TRUE, fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 3, with subset data
attack.h2.het.nearborder.sub <- lm_robust(attack ~ rtb +
                       rtb.other + 
                       nearborder +
                       rtb*rtb.other*nearborder + 
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       #log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub) 

#summary(attack.h2.het.nearborder.sub)

## Placebo model
attack.h2.het.nearborder.sub.pla <- lm_robust(attack ~ rtb.placebo +
                       rtb.other + 
                       nearborder +
                       rtb.placebo*rtb.other*nearborder + 
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       #log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub) 

#summary(attack.h2.het.nearborder.sub.pla)

# Generate predicted probs
attack.h2.het.nearborder.sub.dplot <- pred.lm.tripint.r(attack.h2.het.nearborder.sub, hetvar = "nearborder", placebo = FALSE)
attack.h2.het.nearborder.sub.dplot$Group <- "Actual presence" #label the type of estimate

attack.h2.het.nearborder.sub.pla.dplot <- pred.lm.tripint.r(attack.h2.het.nearborder.sub.pla, hetvar = "nearborder", placebo = TRUE)
attack.h2.het.nearborder.sub.pla.dplot$Group <- "Placebo presence" #label the type of estimate

plot.attack.h2.het.nearborder.sub <- rbind(attack.h2.het.nearborder.sub.dplot, #combine for ggplot
                                           attack.h2.het.nearborder.sub.pla.dplot)

@

<<HetBattleDeath_NearBorderH2, eval=TRUE, echo = FALSE, tidy=TRUE, fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 3, with subset data
best.h2.het.nearborder.sub <- lm_robust(log(best+1) ~ rtb +
                       rtb.other + 
                       nearborder +
                       rtb*rtb.other*nearborder + 
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       #log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub) 

#summary(best.h2.het.nearborder.sub)

## Placebo model
best.h2.het.nearborder.sub.pla <- lm_robust(log(best+1) ~ rtb.placebo +
                       rtb.other + 
                       nearborder +
                       rtb.placebo*rtb.other*nearborder + 
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       #log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub) 

#summary(best.h2.het.nearborder.sub.pla)

# Generate predicted probs
best.h2.het.nearborder.sub.dplot <- pred.lm.tripint.r(best.h2.het.nearborder.sub, hetvar = "nearborder", placebo = FALSE)
best.h2.het.nearborder.sub.dplot$Group <- "Actual presence" #label the type of estimate

best.h2.het.nearborder.sub.pla.dplot <- pred.lm.tripint.r(best.h2.het.nearborder.sub.pla, hetvar = "nearborder", placebo = TRUE)
best.h2.het.nearborder.sub.pla.dplot$Group <- "Placebo presence" #label the type of estimate

plot.best.h2.het.nearborder.sub <- rbind(best.h2.het.nearborder.sub.dplot, #combine for ggplot
                                           best.h2.het.nearborder.sub.pla.dplot)

@

<<Het_NearBorderH2, eval=TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 9, fig.height = 12, out.width= ".95\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure shows the effect  of refugee presence in provinces near vs. far from the border (heterogeneous effects analysis) on conflict outcomes (black) compared to their respective placebo estimates (gray), conditional on refugee presence in other provinces of the same country-year. All point estimates include 95$\\%$ CIs.")>>=

# setwd("Paper_Inputs")
# 
# # Bring in RData 
# load("plot.onset.h2.het.nearborder.sub.Rdata")
# load("plot.incidence.h2.het.nearborder.sub.Rdata")

# Select and rename columns
plot.onset.h2.het.nearborder.sub$outcome <- "onset"
plot.incidence.h2.het.nearborder.sub$outcome <- "incidence"
plot.attack.h2.het.nearborder.sub$outcome <- "attack"
plot.best.h2.het.nearborder.sub$outcome <- "log(best + 1)"

# plots
pd <- position_dodge(0.5) # move them .05 to the left and right

# FAR BORDER: h2a is concentrated, h2b is dispersed
PLOT.onsetincidence.h2a.het.farborder.sub <-
  bind_rows(plot.onset.h2.het.nearborder.sub[plot.onset.h2.het.nearborder.sub$Plot == 2,],
            plot.incidence.h2.het.nearborder.sub[plot.incidence.h2.het.nearborder.sub$Plot == 2,]) %>%
   mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.2, .2)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 

PLOT.onsetincidence.h2b.het.farborder.sub <-
  bind_rows(plot.onset.h2.het.nearborder.sub[plot.onset.h2.het.nearborder.sub$Plot == 4,],
            plot.incidence.h2.het.nearborder.sub[plot.incidence.h2.het.nearborder.sub$Plot == 4,]) %>%
   mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.2, .2)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.attack.h2a.het.farborder.sub <- ggplot(plot.attack.h2.het.nearborder.sub[plot.attack.h2.het.nearborder.sub$Plot == 2,], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-7,7) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.attack.h2b.het.farborder.sub <- ggplot(plot.attack.h2.het.nearborder.sub[plot.attack.h2.het.nearborder.sub$Plot == 4,], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-7,7) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h2a.het.farborder.sub <- ggplot(plot.best.h2.het.nearborder.sub[plot.best.h2.het.nearborder.sub$Plot==2,], 
                                    aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.8,.8) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h2b.het.farborder.sub <- ggplot(plot.best.h2.het.nearborder.sub[plot.best.h2.het.nearborder.sub$Plot==4,], 
                                    aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.8,.8) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )


# NEAR BORDER: h2a is concentrated, h2b is dispersed
PLOT.onsetincidence.h2a.het.nearborder.sub <-
  bind_rows(plot.onset.h2.het.nearborder.sub[plot.onset.h2.het.nearborder.sub$Plot == 1,],
            plot.incidence.h2.het.nearborder.sub[plot.incidence.h2.het.nearborder.sub$Plot == 1,]) %>%
   mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.2, .2)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.onsetincidence.h2b.het.nearborder.sub <-
  bind_rows(plot.onset.h2.het.nearborder.sub[plot.onset.h2.het.nearborder.sub$Plot == 3,],
            plot.incidence.h2.het.nearborder.sub[plot.incidence.h2.het.nearborder.sub$Plot == 3,]) %>%
   mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.2, .2)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.attack.h2a.het.nearborder.sub <- ggplot(plot.attack.h2.het.nearborder.sub[plot.attack.h2.het.nearborder.sub$Plot == 1,], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-7,7) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.attack.h2b.het.nearborder.sub <- ggplot(plot.attack.h2.het.nearborder.sub[plot.attack.h2.het.nearborder.sub$Plot == 3,], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-7,7) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h2a.het.nearborder.sub <- ggplot(plot.best.h2.het.nearborder.sub[plot.best.h2.het.nearborder.sub$Plot==1,], 
                                    aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.8,.8) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h2b.het.nearborder.sub <- ggplot(plot.best.h2.het.nearborder.sub[plot.best.h2.het.nearborder.sub$Plot==3,], 
                                    aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.8,.8) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )


patch1.farborder <- (PLOT.onsetincidence.h2a.het.farborder.sub + 
                      ggtitle("Effect of Concentrated Presence Far from Border (H2a)") + 
  theme(plot.title = element_text(hjust= 0, size = 11))) +
  (PLOT.attack.h2a.het.farborder.sub + PLOT.best.h2a.het.farborder.sub) 

patch1.nearborder <- (PLOT.onsetincidence.h2a.het.nearborder.sub + 
                        ggtitle("Effect of Concentrated Presence Near Border (H2a)") + 
  theme(plot.title = element_text(hjust= 0, size = 11))) +
  (PLOT.attack.h2a.het.nearborder.sub + PLOT.best.h2a.het.nearborder.sub) 

patch2.farborder <- (PLOT.onsetincidence.h2b.het.farborder.sub + 
                      ggtitle("Effect of Dispersed Presence Far from Border (H2b)") + 
  theme(plot.title = element_text(hjust= 0, size = 11))) +
  (PLOT.attack.h2b.het.farborder.sub + PLOT.best.h2b.het.farborder.sub) 

patch2.nearborder <- (PLOT.onsetincidence.h2b.het.nearborder.sub + 
                        ggtitle("Effect of Dispersed Presence Near Border (H2b)") + 
  theme(plot.title = element_text(hjust= 0, size = 11))) +
  (PLOT.attack.h2b.het.nearborder.sub + PLOT.best.h2b.het.nearborder.sub) 

patch1.farborder/
patch1.nearborder/
patch2.farborder/
patch2.nearborder

@

Figure \ref{fig:Het_NearBorderH3} shows the heterogeneous effects on nighttime lights for provinces near and far from the border, respectively, conditional on refugee presence in other provinces of the same country-year. The top two plots compare far and near border provinces when refugee presence is geographically concentrated, while the bottom two plots do so in geographically dispersed cases. There are no effects of refugee presence in dispersed cases. There is a positive and statistically significant effect of refugee presence for provinces far from the border (in the interior) when refugee presence is geographically concentrated. This appears supportive of our argument and other findings on development. 

% H3: Effect of Concentrated/Dispersed Refugee Presence on Nighttime Lights cond. on Proximity to Border

<<HetNightLights_NearBorderH3, eval=TRUE, echo = FALSE, tidy=TRUE, fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 3, with subset data
nlights_calib_mean.h3.het.nearborder.sub <- lm_robust(nlights_calib_mean ~ rtb +
                       rtb.other + 
                       nearborder +
                       rtb*rtb.other*nearborder + 
                       nlights_calib_mean_1 +
                       nlights_mean_neighbor +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       #log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub) 

#summary(nlights_calib_mean.h3.het.nearborder.sub)

## Placebo model
nlights_calib_mean.h3.het.nearborder.sub.pla <- lm_robust(nlights_calib_mean ~ rtb.placebo +
                       rtb.other + 
                       nearborder +
                       rtb.placebo*rtb.other*nearborder + 
                       nlights_calib_mean_1 +
                       nlights_mean_neighbor +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       #log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub) 

#summary(nlights_calib_mean.h3.het.nearborder.sub.pla)

# Generate predicted probs
nlights_calib_mean.h3.het.nearborder.sub.dplot <- pred.lm.tripint.r(nlights_calib_mean.h3.het.nearborder.sub, 
                                                                    hetvar = "nearborder", placebo = FALSE)
nlights_calib_mean.h3.het.nearborder.sub.dplot$Group <- "Actual presence" #label the type of estimate

nlights_calib_mean.h3.het.nearborder.sub.pla.dplot <- pred.lm.tripint.r(nlights_calib_mean.h3.het.nearborder.sub.pla, 
                                                                        hetvar = "nearborder", placebo = TRUE)
nlights_calib_mean.h3.het.nearborder.sub.pla.dplot$Group <- "Placebo presence" #label the type of estimate

plot.nlights_calib_mean.h3.het.nearborder.sub <- rbind(nlights_calib_mean.h3.het.nearborder.sub.dplot, #combine for ggplot
                                                       nlights_calib_mean.h3.het.nearborder.sub.pla.dplot)

@

<<Het_NearBorderH3, eval=TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 9, fig.height = 6, out.width= "1\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure shows the effect  of refugee presence in provinces near vs. far from the border (heterogeneous effects analysis) on nightlights (black) compared to their respective placebo estimates (gray), conditional on refugee presence in other provinces of the same country-year. All point estimates include 95$\\%$ CIs.")>>=

# Select and rename columns
plot.nlights_calib_mean.h3.het.nearborder.sub$outcome <- "nighttime lights"

# plots
pd <- position_dodge(0.5) # move them .05 to the left and right

# FAR BORDER: h2a is concentrated, h2b is dispersed
PLOT.nlights_calib_mean.h2a.het.farborder.sub <- ggplot(plot.nlights_calib_mean.h3.het.nearborder.sub[plot.nlights_calib_mean.h3.het.nearborder.sub$Plot == 2,], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.25,.25) +
  ylab("Change in Predicted Lights") +
  xlab("") +
  scale_x_discrete(labels=c("Average Nighttime Lights")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 

PLOT.nlights_calib_mean.h2b.het.farborder.sub <- ggplot(plot.nlights_calib_mean.h3.het.nearborder.sub[plot.nlights_calib_mean.h3.het.nearborder.sub$Plot == 4,], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.25,.25) +
  ylab("Change in Predicted Lights") +
  xlab("") +
  scale_x_discrete(labels=c("Average Nighttime Lights")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

# NEAR BORDER: h2a is concentrated, h2b is dispersed
PLOT.nlights_calib_mean.h2a.het.nearborder.sub <- ggplot(plot.nlights_calib_mean.h3.het.nearborder.sub[plot.nlights_calib_mean.h3.het.nearborder.sub$Plot == 1,], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.25,.25) +
  ylab("Change in Predicted Lights") +
  xlab("") +
  scale_x_discrete(labels=c("Average Nighttime Lights")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.nlights_calib_mean.h2b.het.nearborder.sub <- ggplot(plot.nlights_calib_mean.h3.het.nearborder.sub[plot.nlights_calib_mean.h3.het.nearborder.sub$Plot == 3,], 
                                      aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.25,.25) +
  ylab("Change in Predicted Lights") +
  xlab("") +
  scale_x_discrete(labels=c("Average Nighttime Lights")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

patch1.farborder <- (PLOT.nlights_calib_mean.h2a.het.farborder.sub + 
                      ggtitle("Effect of Concentrated Presence Far from Border (H3a)") + 
  theme(plot.title = element_text(hjust= 0, size = 11))) 

patch1.nearborder <- (PLOT.nlights_calib_mean.h2a.het.nearborder.sub + 
                        ggtitle("Effect of Concentrated Presence Near Border (H3a)") + 
  theme(plot.title = element_text(hjust= 0, size = 11))) 

patch2.farborder <- (PLOT.nlights_calib_mean.h2b.het.farborder.sub + 
                      ggtitle("Effect of Dispersed Presence Far from Border (H3b)") + 
  theme(plot.title = element_text(hjust= 0, size = 11)))

patch2.nearborder <- (PLOT.nlights_calib_mean.h2b.het.nearborder.sub + 
                        ggtitle("Effect of Dispersed Presence Near Border (H3b)") + 
  theme(plot.title = element_text(hjust= 0, size = 11))) 

(patch1.farborder + patch1.nearborder) /
(patch2.farborder + patch2.nearborder)

@


%%% BY REGION
\newpage
\subsection{Subgroup Effects by Region}
\label{SIsec:hte_region}

There are 14 regions in our data, but 6 of these regions either only had 1 country that hosted refugees or did not have enough observations for both the actual analysis or placebo version. Thus, we end up with 7 regions for subgroup analysis. 
Of these 7 regions, most show null effects. However, we observe a statistically significant positive effect of refugee presence on battle deaths (as well as positive estimates that do not reach statistical significance for the other three outcomes) in Southern Asia -- Afghanistan, Bangladesh, India, Iran, Nepal, and Pakistan. On the other side, we observe a statistically significant negative effect of refugee presence on battle deaths in Western Asia, which includes countries that host Syrian refugees, i.e. Turkey and Jordan. 

We do not carry out additional subgroup analysis for H2 (concentrated vs. dispersed presence on conflict outcomes) or H3 (concentrated vs. dispersed presence on nighttime lights) because once we subset further for HTE, there is not enough data to analyze results in a meaningful way. Nevertheless, showing these subgroup effects for H1 are useful for us to understand the overall null effect; in some areas it is a null effects, while in other areas, namely Southern Asia and Western Asia, the null masks positive and negative effects. 

<<ByRegionH1_analysis, eval=FALSE, echo = FALSE, tidy=TRUE, fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE>>=

#Only include regions that have more than 1 country in geddatasub or plagedrtbsub.
sub_regions_gt1 <- geddatasub %>% 
  group_by(REGION) %>%
  summarize(num_countries = length(unique(Country))) %>%
  filter(num_countries > 1) %>% 
  pull(REGION)

regions <- sub_regions_gt1[!(sub_regions_gt1 %in% c("Southern Europe", "Southern Africa", 
                                                    "Eastern Europe", "Central Asia"))]
  
onset.h1.sub.byregion.effect <- 
  incidence.h1.sub.byregion.effect <- 
  onset.h1.sub.byregion.effect.pla <- 
  incidence.h1.sub.byregion.effect.pla <- data.frame(Means = NA, 
                                             SE = NA, 
                                             CIU = NA, 
                                             CIL = NA, 
                                             Est = NA, 
                                             Plot = NA, 
                                             Region = NA)

attack.h1.sub.byregion.effect <- 
  best.h1.sub.byregion.effect <- 
  attack.h1.sub.byregion.effect.pla <- 
  best.h1.sub.byregion.effect.pla <- data.frame(term = NA,
                                             estimate = NA,
                                             std.error = NA,
                                             statistic = NA,
                                             p.value = NA,
                                             conf.low = NA,
                                             conf.high = NA,
                                             df = NA,
                                             outcome = NA,
                                             Region = NA)

registerDoMC(detectCores()-1)
byregion_results <- foreach(i = 1:length(regions)) %dopar% {
  
  geddata.byregion <- geddatasub[geddatasub$REGION == regions[i],]
  plagedrtb.byregion <- plagedrtbsub[plagedrtbsub$REGION == regions[i],]
  
  # run the main conflict models
  
  # Onset
  onset.h1.sub.byregion <- bayesglm(onset.n ~ rtb + #treatment model
                       rtb.other + 
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata.byregion[geddata.byregion$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 
  
  onset.h1.sub.byregion.pla <- bayesglm(onset.n ~ rtb.placebo + #placebo model
                       rtb.other + 
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtb.byregion[plagedrtb.byregion$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

  # Incidence
  incidence.h1.sub.byregion <- bayesglm(incidence ~ rtb + 
                       rtb.other + 
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata.byregion[geddata.byregion$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 
  
  incidence.h1.sub.byregion.pla <- bayesglm(incidence ~ rtb.placebo + 
                       rtb.other + 
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                      log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtb.byregion[plagedrtb.byregion$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 
  
  # Violent events
  attack.h1.sub.byregion <- lm_robust(attack ~ rtb + 
                       rtb.other + 
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata.byregion)
  
  attack.h1.sub.byregion.pla <- lm_robust(attack ~ rtb.placebo + 
                       rtb.other + 
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtb.byregion)
  
  # Battle deaths
  best.h1.sub.byregion <- lm_robust(log(best+1) ~ rtb + 
                       rtb.other + 
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = geddata.byregion)
  
  best.h1.sub.byregion.pla <- lm_robust(log(best+1) ~ rtb.placebo + 
                       rtb.other + 
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = plagedrtb.byregion)
  
  #generate predicted probs
  onset.h1.sub.byregion.dplot <- pred.bi(onset.h1.sub.byregion)[3,]
  onset.h1.sub.byregion.pla.dplot <- pred.bi(onset.h1.sub.byregion.pla)[3,]

  incidence.h1.sub.byregion.dplot <- pred.bi(incidence.h1.sub.byregion)[3,]
  incidence.h1.sub.byregion.pla.dplot <- pred.bi(incidence.h1.sub.byregion.pla)[3,]
  
  attack.h1.sub.byregion.dplot <- tidy(attack.h1.sub.byregion)[attack.h1.sub.byregion$term == "rtb",] 
  attack.h1.sub.byregion.pla.dplot <- tidy(attack.h1.sub.byregion.pla)[attack.h1.sub.byregion.pla$term == "rtb.placebo",] 
  
  best.h1.sub.byregion.dplot <- tidy(best.h1.sub.byregion)[best.h1.sub.byregion$term == "rtb",] 
  best.h1.sub.byregion.pla.dplot <- tidy(best.h1.sub.byregion.pla)[best.h1.sub.byregion.pla$term == "rtb.placebo",] 
  
  #label region
  onset.h1.sub.byregion.dplot$Region <- onset.h1.sub.byregion.pla.dplot$Region <- 
  incidence.h1.sub.byregion.dplot$Region <- incidence.h1.sub.byregion.pla.dplot$Region <- 
  attack.h1.sub.byregion.dplot$Region <- attack.h1.sub.byregion.pla.dplot$Region <- 
  best.h1.sub.byregion.dplot$Region <- best.h1.sub.byregion.pla.dplot$Region <- as.character(regions[i])
  
  #label group
  onset.h1.sub.byregion.dplot$Group <- 
  incidence.h1.sub.byregion.dplot$Group <- 
  attack.h1.sub.byregion.dplot$Group <- 
  best.h1.sub.byregion.dplot$Group <- "Actual presence" #label the type of estimate
  
  onset.h1.sub.byregion.pla.dplot$Group <- 
  incidence.h1.sub.byregion.pla.dplot$Group <- 
  attack.h1.sub.byregion.pla.dplot$Group <- 
  best.h1.sub.byregion.pla.dplot$Group <- "Placebo presence"
  
  #only take main estimate
  return(list(onset.h1.sub.byregion.dplot, onset.h1.sub.byregion.pla.dplot, 
              incidence.h1.sub.byregion.dplot, incidence.h1.sub.byregion.pla.dplot, 
              attack.h1.sub.byregion.dplot,  attack.h1.sub.byregion.pla.dplot, 
              best.h1.sub.byregion.dplot, best.h1.sub.byregion.pla.dplot))
}

onset.h1.sub.byregion.effect <- bind_rows(lapply(byregion_results, `[[`, 1))
onset.h1.sub.byregion.pla.effect <- bind_rows(lapply(byregion_results, `[[`, 2))

incidence.h1.sub.byregion.effect <- bind_rows(lapply(byregion_results, `[[`, 3))
incidence.h1.sub.byregion.pla.effect <- bind_rows(lapply(byregion_results, `[[`, 4))

attack.h1.sub.byregion.effect <- bind_rows(lapply(byregion_results, `[[`, 5))
attack.h1.sub.byregion.pla.effect <- bind_rows(lapply(byregion_results, `[[`, 6))

best.h1.sub.byregion.effect <- bind_rows(lapply(byregion_results, `[[`, 7))
best.h1.sub.byregion.pla.effect <- bind_rows(lapply(byregion_results, `[[`, 8))

# reorder Regions
regionorder <- c("Southeastern Asia", "Southern Asia", "Western Asia",  
                 "Western Africa", "Northern Africa", "Middle Africa", "Eastern Africa")

onset.h1.sub.byregion.effect$Region <- fct_relevel(onset.h1.sub.byregion.effect$Region, regionorder)
onset.h1.sub.byregion.pla.effect$Region <- fct_relevel(onset.h1.sub.byregion.pla.effect$Region, regionorder)

incidence.h1.sub.byregion.effect$Region <- fct_relevel(incidence.h1.sub.byregion.effect$Region, regionorder)
incidence.h1.sub.byregion.pla.effect$Region <- fct_relevel(incidence.h1.sub.byregion.pla.effect$Region, regionorder)

attack.h1.sub.byregion.effect$Region <- fct_relevel(attack.h1.sub.byregion.effect$Region, regionorder)
attack.h1.sub.byregion.pla.effect$Region <- fct_relevel(attack.h1.sub.byregion.pla.effect$Region, regionorder)

best.h1.sub.byregion.effect$Region <- fct_relevel(best.h1.sub.byregion.effect$Region, regionorder)
best.h1.sub.byregion.pla.effect$Region <- fct_relevel(best.h1.sub.byregion.pla.effect$Region, regionorder)
  
# save as Rdata
save(onset.h1.sub.byregion.effect, file = "onset.h1.sub.byregion.effect.Rdata")
save(onset.h1.sub.byregion.pla.effect, file = "onset.h1.sub.byregion.pla.effect.Rdata")

save(incidence.h1.sub.byregion.effect, file = "incidence.h1.sub.byregion.effect.Rdata")
save(incidence.h1.sub.byregion.pla.effect, file = "incidence.h1.sub.byregion.pla.effect.Rdata")

save(attack.h1.sub.byregion.effect, file = "attack.h1.sub.byregion.effect.Rdata")
save(attack.h1.sub.byregion.pla.effect, file = "attack.h1.sub.byregion.pla.effect.Rdata")

save(best.h1.sub.byregion.effect, file = "best.h1.sub.byregion.effect.Rdata")
save(best.h1.sub.byregion.pla.effect, file = "best.h1.sub.byregion.pla.effect.Rdata")

@

<<ByRegionH1, eval=TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 9, fig.height = 11.5, out.width= "1\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure shows the effect of refugee presence by region (subgroup effects analysis) on conflict outcomes (black), compared to their respective placebo estimates (gray). All point estimates include 95$\\%$ CIs.")>>=

# # bring in data
setwd("Paper_Inputs")

load("onset.h1.sub.byregion.effect.Rdata")
load("onset.h1.sub.byregion.pla.effect.Rdata")
load("incidence.h1.sub.byregion.effect.Rdata")
load("incidence.h1.sub.byregion.pla.effect.Rdata")
load("attack.h1.sub.byregion.effect.Rdata")
load("attack.h1.sub.byregion.pla.effect.Rdata")
load("best.h1.sub.byregion.effect.Rdata")
load("best.h1.sub.byregion.pla.effect.Rdata")

# Onset across regions
pd <- position_dodge(0.5) # move them .05 to the left and right

PLOT.onset.h1.sub.byregion <- bind_rows(onset.h1.sub.byregion.effect, 
                                        onset.h1.sub.byregion.pla.effect) %>%
  ggplot(aes(x = Region, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  ggtitle("Effect of Refugee Presence on Conflict Onset (H1)") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.3, .3)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 

PLOT.incidence.h1.sub.byregion <- bind_rows(incidence.h1.sub.byregion.effect, 
                                            incidence.h1.sub.byregion.pla.effect) %>%
  ggplot(aes(x = Region, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  ggtitle("Effect of Refugee Presence on Conflict Incidence (H1)") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.3, .3)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 

PLOT.attack.h1.sub.byregion <- bind_rows(attack.h1.sub.byregion.effect, 
                                         attack.h1.sub.byregion.pla.effect) %>%
  ggplot(aes(x = Region, y = estimate, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(conf.low)), 
                    ymax = as.numeric(as.character(conf.high))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Number") +
  xlab("") +
  ggtitle("Effect of Refugee Presence on Violent Events (H1)") +
  scale_y_continuous(limits = c(-10, 10)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 

PLOT.best.h1.sub.byregion <- bind_rows(best.h1.sub.byregion.effect, 
                                       best.h1.sub.byregion.pla.effect) %>%
  ggplot(aes(x = Region, y = estimate, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(conf.low)), 
                    ymax = as.numeric(as.character(conf.high))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  ggtitle("Effect of Refugee Presence on Battle Deaths (H1)") +
  scale_y_continuous(limits = c(-1, 1)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 

# Plot

PLOT.onset.h1.sub.byregion /
PLOT.incidence.h1.sub.byregion /
PLOT.attack.h1.sub.byregion /
PLOT.best.h1.sub.byregion

@



%%%%% ROBUSTNESS CHECKS 
\clearpage 
\newpage

\section{Robustness Checks}
\label{SIsec:Robustness}

Here, we detail the following robustness checks for the conflict analyses included in the paper: (1) dropping one country at a time, (2) dropping one region at a time, (3) using the full dataset of all countries, (4) using a dynamically subsetted dataset of only countries that have already hosted refugee sites at time $t$, (5) using 5 year lead dependent variables to confirm that there are no delayed effects of refugee presence on conflict, and (6) replacing our {\tt Conflict Onset} and {\tt Conflict Incidence} outcomes (1990 - 2008) from \citet{hallberg2011prio} with wzoneData, conflict polygon data constructed by \citet{Kikuta:2020} using machine learning based on UCDP GED 19.1 conflict events data (1990 - 2018).

%% Dropping One Country at a Time
\subsection{Drop One Country at a Time}
\label{SIsec:DropCountryAnalysis}

\subsubsection{Robustness of the Null Effect of Refugee Presence (H1)}

Figure \ref{fig:DropCountryH1} shows the distribution of the main effect of the presence of refugee sites on conflict outcomes after dropping one country at a time. There are a total of \Sexpr{length(unique(geddatasub$Country))} countries in the main data. All plots show that the predicted probability estimates generated when dropping a country tightly group around the actual estimate, in which no country is dropped, confirming that our findings are not due to outlier countries. 

<<DropCountryAnalysisH1_dontrun, eval=FALSE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

countries <- unique(geddatasub$Country) #list of countries in the data

onset.h1.sub.dropcountry.effect <- 
  incidence.h1.sub.dropcountry.effect <- data.frame(Means = NA, 
                                             SE = NA, 
                                             CIU = NA, 
                                             CIL = NA, 
                                             Est = NA, 
                                             Plot = NA, 
                                             Country = NA)

attack.h1.sub.dropcountry.effect <- 
  best.h1.sub.dropcountry.effect <- data.frame(term = NA,
                                             estimate = NA,
                                             std.error = NA,
                                             statistic = NA,
                                             p.value = NA,
                                             conf.low = NA,
                                             conf.high = NA,
                                             df = NA,
                                             outcome = NA,
                                             Country = NA)
  
registerDoMC(detectCores()-1)
dropcountry_results <- foreach(i = 1:length(countries)) %dopar% {
  
  geddata.dropcountry <- geddatasub[geddatasub$Country != countries[i],]

  # run the main conflict models
  
  # Onset
  onset.h1.sub.dropcountry <- bayesglm(onset.n ~ rtb + 
                       rtb.other + 
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata.dropcountry[geddata.dropcountry$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

  # Incidence
  incidence.h1.sub.dropcountry <- bayesglm(incidence ~ rtb + 
                       rtb.other + 
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata.dropcountry[geddata.dropcountry$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 
  
  # Violent events
  attack.h1.sub.dropcountry <- lm_robust(attack ~ rtb + 
                       rtb.other + 
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata.dropcountry)
  
  # Battle deaths
  best.h1.sub.dropcountry <- lm_robust(log(best+1) ~ rtb + 
                       rtb.other + 
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = geddata.dropcountry)
  
  #generate predicted probs
  onset.h1.sub.dropcountry.dplot <- pred.bi(onset.h1.sub.dropcountry)[3,]
  incidence.h1.sub.dropcountry.dplot <- pred.bi(incidence.h1.sub.dropcountry)[3,]
  attack.h1.sub.dropcountry.dplot <- tidy(attack.h1.sub.dropcountry)[attack.h1.sub.dropcountry$term == "rtb",] 
  best.h1.sub.dropcountry.dplot <- tidy(best.h1.sub.dropcountry)[best.h1.sub.dropcountry$term == "rtb",] 
  
  #label country
  onset.h1.sub.dropcountry.dplot$Country <- 
  incidence.h1.sub.dropcountry.dplot$Country <- 
  attack.h1.sub.dropcountry.dplot$Country <- 
  best.h1.sub.dropcountry.dplot$Country <- as.character(countries[i])
  
  #only take main estimate
  return(list(onset.h1.sub.dropcountry.dplot, incidence.h1.sub.dropcountry.dplot, 
              attack.h1.sub.dropcountry.dplot, best.h1.sub.dropcountry.dplot))
}

onset.h1.sub.dropcountry.effect <- bind_rows(lapply(dropcountry_results, `[[`, 1))
incidence.h1.sub.dropcountry.effect <- bind_rows(lapply(dropcountry_results, `[[`, 2))
attack.h1.sub.dropcountry.effect <- bind_rows(lapply(dropcountry_results, `[[`, 3))
best.h1.sub.dropcountry.effect <- bind_rows(lapply(dropcountry_results, `[[`, 4))

# save as Rdata
save(onset.h1.sub.dropcountry.effect, file = "Paper_Inputs/onset.h1.sub.dropcountry.effect.Rdata")
save(incidence.h1.sub.dropcountry.effect, file = "Paper_Inputs/incidence.h1.sub.dropcountry.effect.Rdata")
save(attack.h1.sub.dropcountry.effect, file = "Paper_Inputs/attack.h1.sub.dropcountry.effect.Rdata")
save(best.h1.sub.dropcountry.effect, file = "Paper_Inputs/best.h1.sub.dropcountry.effect.Rdata")

@

<<DropCountryH1, eval = TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 9, fig.height = 5.8, out.width= "1\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap="This figure shows the distribution of the main effect of the presence of refugees sites on conflict outcomes after dropping one country at a time compared to the actual estimate (no dropped countries) with 95$\\%$ CIs. These estimates are grouped tightly around the actual estimates and do not substantively change the main results.">>=

# bring in data
load("Paper_Inputs/onset.h1.sub.dropcountry.effect.Rdata")
load("Paper_Inputs/incidence.h1.sub.dropcountry.effect.Rdata")
load("Paper_Inputs/attack.h1.sub.dropcountry.effect.Rdata")
load("Paper_Inputs/best.h1.sub.dropcountry.effect.Rdata")

# load("Paper_Inputs/plot.onset.h1.sub.Rdata")
# load("Paper_Inputs/plot.incidence.h1.sub.Rdata")

# Onset (subset data) drop country plot
PLOT.onset.h1.sub.dropcountry.effect <- ggplot(onset.h1.sub.dropcountry.effect) + 
  geom_density(aes(x=Means, ..density../sum(..density..)), alpha=.2, fill="black") +
  xlim(-.05, .01) +
  ylab("Density") +
  xlab("Change in Predicted Probability") +
  ggtitle("Distribution of Effect of Refugee\nPresence on Onset (H1)") +
  geom_vline(aes(xintercept=0)) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        ) +
  geom_point(aes(x = plot.onset.h1.sub[3,1], y = .025), size = 2) +
  geom_segment(aes(x = plot.onset.h1.sub[3,4],
                   xend = plot.onset.h1.sub[3,3], y = .025, yend = .025)) +
  geom_text(x=-.03, y=.035, label="Actual estimate")

# Incidence (subset data) drop country plot
PLOT.incidence.h1.sub.dropcountry.effect <- ggplot(incidence.h1.sub.dropcountry.effect) + 
  geom_density(aes(x=Means, y = ..density../sum(..density..)), alpha=.2, fill="black") +
  xlim(-.04, .01) +
  ylab("Density") +
  xlab("Change in Predicted Probability") +
  ggtitle("Distribution of Effect of Refugee\nPresence on Incidence (H1)") +
  geom_vline(aes(xintercept=0)) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        ) +
  geom_point(aes(x = plot.incidence.h1.sub[3,1], y = .025), size = 2) +
  geom_segment(aes(x = plot.incidence.h1.sub[3,4],
                   xend = plot.incidence.h1.sub[3,3], 
                   y = .025, yend = .025)) +
  geom_text(x=-.025, y=.03, label="Actual estimate") 

# Violent Events (subset data) drop country plot
PLOT.attack.h1.sub.dropcountry.effect <- ggplot(attack.h1.sub.dropcountry.effect) + 
  geom_density(aes(x=estimate, ..density../sum(..density..)), alpha=.2, fill="black") +
  xlim(-1.5, 1) +
  ylab("Density") +
  xlab("Change in Predicted Number") +
  ggtitle("Distribution of Effect of Refugee\nPresence on Violent Events (H1)") +
  geom_vline(aes(xintercept=0)) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        ) +
  geom_point(aes(x = plot.attack.h1.sub[1,2], y = .03), size = 2) +
  geom_segment(aes(x = plot.attack.h1.sub[1,6],
                   xend = plot.attack.h1.sub[1,7], y = .03, yend = .03)) +
  geom_text(x=-.5, y=.04, label="Actual estimate") 

# Battle Deaths (subset data) drop country plot
PLOT.best.h1.sub.dropcountry.effect <- ggplot(best.h1.sub.dropcountry.effect) + 
  geom_density(aes(x=estimate, ..density../sum(..density..)), alpha=.2, fill="black") +
  xlim(-.2, .1) +
  ylab("Density") +
  xlab("Change in Predicted Number (logged)") +
  ggtitle("Distribution of Effect of Refugee\nPresence on Battle Deaths (H1)") +
  geom_vline(aes(xintercept=0)) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        ) +
  geom_point(aes(x = plot.best.h1.sub[1,2], y = .03), size = 2) +
  geom_segment(aes(x = plot.best.h1.sub[1,6],
                   xend = plot.best.h1.sub[1,7], y = .03, yend = .03)) +
  geom_text(x=-.1, y=.04, label="Actual estimate") 

# Plot
(PLOT.onset.h1.sub.dropcountry.effect + PLOT.incidence.h1.sub.dropcountry.effect) /
(PLOT.attack.h1.sub.dropcountry.effect + PLOT.best.h1.sub.dropcountry.effect)

@


\newpage
\subsubsection{Robustness of the \emph{conditional risk reduction effect} conditional on Concentrated Refugee Presence (H2a)}

Figure \ref{fig:DropCountryH2} shows the distribution of the \emph{conditional risk reduction effect}, in the case of geographically concentrated refugee sites, for conflict outcomes after dropping one country at a time. Again, there are a total of \Sexpr{length(unique(geddatasub$Country))} countries in the data. Overall, these distributions, which concentrate around the actual estimates (including all countries), show that the \emph{conditional risk reduction effect} is not driven by any outlier countries. 

<<DropCountryAnalysisH2_dontrun, eval=FALSE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

countries <- unique(geddatasub$Country) #list of countries in the data

onset.h2.sub.dropcountry.effect <- 
  incidence.h2.sub.dropcountry.effect <- data.frame(Means = NA, 
                                             SE = NA, 
                                             CIU = NA, 
                                             CIL = NA, 
                                             Est = NA, 
                                             Plot = NA, 
                                             Country = NA)

attack.h2.sub.dropcountry.effect <- 
  best.h2.sub.dropcountry.effect <- data.frame(Means = NA, 
                                             SE = NA, 
                                             CIL = NA, 
                                             CIU = NA,
                                             Est = NA, 
                                             outcome = NA, 
                                             Country = NA)
  
registerDoMC(detectCores()-1)
dropcountry_results <- foreach(i = 1:length(countries)) %dopar% {
  
  geddata.dropcountry <- geddatasub[geddatasub$Country != countries[i],]

  # Onset
  onset.h2.sub.dropcountry <- bayesglm(onset.n ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata.dropcountry[geddata.dropcountry$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

  # Incidence
  incidence.h2.sub.dropcountry <- bayesglm(incidence ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata.dropcountry[geddata.dropcountry$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 
  
  # Violent events
  attack.h2.sub.dropcountry <- lm_robust(attack ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata.dropcountry)
  
  # Battle deaths
  best.h2.sub.dropcountry <- lm_robust(log(best+1) ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata.dropcountry)
  
  #generate predicted probs
  onset.h2.sub.dropcountry.dplot <- pred.bi.int(onset.h2.sub.dropcountry)[3,]
  incidence.h2.sub.dropcountry.dplot <- pred.bi.int(incidence.h2.sub.dropcountry)[3,]
  attack.h2.sub.dropcountry.dplot <- pred.lm.int.r(attack.h2.sub.dropcountry)[1,]
  best.h2.sub.dropcountry.dplot <- pred.lm.int.r(best.h2.sub.dropcountry)[1,]
  
  #label country
  onset.h2.sub.dropcountry.dplot$Country <- 
  incidence.h2.sub.dropcountry.dplot$Country <- 
  attack.h2.sub.dropcountry.dplot$Country <- 
  best.h2.sub.dropcountry.dplot$Country <- as.character(countries[i])
  
  #only take main estimate
  return(list(onset.h2.sub.dropcountry.dplot, incidence.h2.sub.dropcountry.dplot, 
              attack.h2.sub.dropcountry.dplot, best.h2.sub.dropcountry.dplot))
}

onset.h2.sub.dropcountry.effect <- bind_rows(lapply(dropcountry_results, `[[`, 1))
incidence.h2.sub.dropcountry.effect <- bind_rows(lapply(dropcountry_results, `[[`, 2))
attack.h2.sub.dropcountry.effect <- bind_rows(lapply(dropcountry_results, `[[`, 3))
best.h2.sub.dropcountry.effect <- bind_rows(lapply(dropcountry_results, `[[`, 4))

# save as Rdata
save(onset.h2.sub.dropcountry.effect, file = "Paper_Inputs/onset.h2.sub.dropcountry.effect.Rdata")
save(incidence.h2.sub.dropcountry.effect, file = "Paper_Inputs/incidence.h2.sub.dropcountry.effect.Rdata")
save(attack.h2.sub.dropcountry.effect, file = "Paper_Inputs/attack.h2.sub.dropcountry.effect.Rdata")
save(best.h2.sub.dropcountry.effect, file = "Paper_Inputs/best.h2.sub.dropcountry.effect.Rdata")

@

<<DropCountryH2, eval = TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 9, fig.height = 6, out.width= "1\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap="This figure shows the distribution of the \\emph{conditional risk reduction effect}, in the case of geographically concentrated refugee sites within a country, after dropping one country at a time compared to the actual estimate (no dropped countries) with 95$\\%$ CIs. These estimates are grouped tightly around the actual estimates and do not substantively change the main results.">>=

# bring in data
load("Paper_Inputs/onset.h2.sub.dropcountry.effect.Rdata")
load("Paper_Inputs/incidence.h2.sub.dropcountry.effect.Rdata")
load("Paper_Inputs/attack.h2.sub.dropcountry.effect.Rdata")
load("Paper_Inputs/best.h2.sub.dropcountry.effect.Rdata")

load("Paper_Inputs/plot.onset.h2.sub.Rdata")
load("Paper_Inputs/plot.incidence.h2.sub.Rdata")

# Onset (subset data) drop country plot
PLOT.onset.h2.sub.dropcountry.effect <- ggplot(onset.h2.sub.dropcountry.effect) + 
  geom_density(aes(x=Means, ..density../sum(..density..)), alpha=.2, fill="black") +
  xlim(-.1, .01) +
  ylab("Density") +
  xlab("Change in Predicted Probability") +
  ggtitle("Distribution of Effect of Concentrated\nRefugee Presence on Onset (H2a)") +
  geom_vline(aes(xintercept=0)) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        ) +
  geom_point(aes(x = plot.onset.h2.sub[3,1], y = .03), size = 2) +
  geom_segment(aes(x = plot.onset.h2.sub[3,4],
                   xend = plot.onset.h2.sub[3,3], y = .03, yend = .03)) +
  geom_text(x=-.075, y=.035, label="Actual estimate")

# Incidence (subset data) drop country plot
PLOT.incidence.h2.sub.dropcountry.effect <- ggplot(incidence.h2.sub.dropcountry.effect) + 
  geom_density(aes(x=Means, y = ..density../sum(..density..)), alpha=.2, fill="black") +
  xlim(-.1, .01) +
  ylab("Density") +
  xlab("Change in Predicted Probability") +
  ggtitle("Distribution of Effect of Concentrated\nRefugee Presence on Incidence (H2a)") +
  geom_vline(aes(xintercept=0)) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        ) +
  geom_point(aes(x = plot.incidence.h2.sub[3,1], y = .025), size = 2) +
  geom_segment(aes(x = plot.incidence.h2.sub[3,4],
                   xend = plot.incidence.h2.sub[3,3], 
                   y = .025, yend = .025)) +
  geom_text(x=-.04, y=.03, label="Actual estimate") 

# Violent Events (subset data) drop country plot
PLOT.attack.h2.sub.dropcountry.effect <- ggplot(attack.h2.sub.dropcountry.effect) + 
  geom_density(aes(x=Means, ..density../sum(..density..)), alpha=.2, fill="black") +
  xlim(-3, .1) +
  ylab("Density") +
  xlab("Change in Predicted Number") +
  ggtitle("Distribution of Effect of Concentrated\nRefugee Presence on Violent Events (H2a)") +
  geom_vline(aes(xintercept=0)) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        ) +
  geom_point(aes(x = plot.attack.h2.sub[1,1], y = .025), size = 2) +
  geom_segment(aes(x = plot.attack.h2.sub[1,3],
                   xend = plot.attack.h2.sub[1,4], y = .025, yend = .025)) +
  geom_text(x=-.5, y=.03, label="Actual estimate") 

# Battle Deaths (subset data) drop country plot
PLOT.best.h2.sub.dropcountry.effect <- ggplot(best.h2.sub.dropcountry.effect) + 
  geom_density(aes(x=Means, ..density../sum(..density..)), alpha=.2, fill="black") +
  xlim(-.4, .05) +
  ylab("Density") +
  xlab("Change in Predicted Number (logged)") +
  ggtitle("Distribution of Effect of Concentrated\nRefugee Presence on Battle Deaths (H2a)") +
  geom_vline(aes(xintercept=0)) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        ) +
  geom_point(aes(x = plot.best.h2.sub[1,1], y = .03), size = 2) +
  geom_segment(aes(x = plot.best.h2.sub[1,3],
                   xend = plot.best.h2.sub[1,4], y = .03, yend = .03)) +
  geom_text(x=-.25, y=.038, label="Actual estimate") 

# Plot
(PLOT.onset.h2.sub.dropcountry.effect + PLOT.incidence.h2.sub.dropcountry.effect) /
(PLOT.attack.h2.sub.dropcountry.effect + PLOT.best.h2.sub.dropcountry.effect)

@


%% Dropping One Region at a Time
\newpage
\subsection{Drop One Region at a Time}
\label{SIsec:DropRegionAnalysis}

\subsubsection{Robustness of the Null Effect of Refugee Presence (H1)}

Figure \ref{fig:DropRegionH1} shows the barplot of number of provinces dropped per region, and the main null effect of refugee presence once each region is dropped. Overall, these estimates confirm the consistency of the main result of the null effect of the presence of refugee sites on conflict, confirming that our findings are not driven by any particular region. 

<<DropRegionAnalysisH1_dontrun, eval=FALSE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

regions <- unique(geddatasub$REGION)

onset.h1.sub.dropregion.effect <- 
  incidence.h1.sub.dropregion.effect <- data.frame(Means = NA, 
                                             SE = NA, 
                                             CIU = NA, 
                                             CIL = NA, 
                                             Est = NA, 
                                             Plot = NA, 
                                             Region = NA)

attack.h1.sub.dropregion.effect <- 
  best.h1.sub.dropregion.effect <- data.frame(term = NA,
                                             estimate = NA,
                                             std.error = NA,
                                             statistic = NA,
                                             p.value = NA,
                                             conf.low = NA,
                                             conf.high = NA,
                                             df = NA,
                                             outcome = NA,
                                             Region = NA)

registerDoMC(detectCores()-1)
dropregion_results <- foreach(i = 1:length(regions)) %dopar% {
  
  geddata.dropregion <- geddatasub[geddatasub$REGION != regions[i],]

  # run the main conflict models
  
  # Onset
  onset.h1.sub.dropregion <- bayesglm(onset.n ~ rtb + 
                       rtb.other + 
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata.dropregion[geddata.dropregion$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

  # Incidence
  incidence.h1.sub.dropregion <- bayesglm(incidence ~ rtb + 
                       rtb.other + 
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata.dropregion[geddata.dropregion$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 
  
  # Violent events
  attack.h1.sub.dropregion <- lm_robust(attack ~ rtb + 
                       rtb.other + 
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata.dropregion)
  
  # Battle deaths
  best.h1.sub.dropregion <- lm_robust(log(best+1) ~ rtb + 
                       rtb.other + 
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = geddata.dropregion)
  
  #generate predicted probs
  onset.h1.sub.dropregion.dplot <- pred.bi(onset.h1.sub.dropregion)[3,]
  incidence.h1.sub.dropregion.dplot <- pred.bi(incidence.h1.sub.dropregion)[3,]
  attack.h1.sub.dropregion.dplot <- tidy(attack.h1.sub.dropregion)[attack.h1.sub.dropregion$term == "rtb",] 
  best.h1.sub.dropregion.dplot <- tidy(best.h1.sub.dropregion)[best.h1.sub.dropregion$term == "rtb",] 
  
  #label region
  onset.h1.sub.dropregion.dplot$Region <- 
  incidence.h1.sub.dropregion.dplot$Region <- 
  attack.h1.sub.dropregion.dplot$Region <- 
  best.h1.sub.dropregion.dplot$Region <- as.character(regions[i])
  
  #only take main estimate
  return(list(onset.h1.sub.dropregion.dplot, incidence.h1.sub.dropregion.dplot, 
              attack.h1.sub.dropregion.dplot, best.h1.sub.dropregion.dplot))
}

onset.h1.sub.dropregion.effect <- bind_rows(lapply(dropregion_results, `[[`, 1))
incidence.h1.sub.dropregion.effect <- bind_rows(lapply(dropregion_results, `[[`, 2))
attack.h1.sub.dropregion.effect <- bind_rows(lapply(dropregion_results, `[[`, 3))
best.h1.sub.dropregion.effect <- bind_rows(lapply(dropregion_results, `[[`, 4))

# reorder Regions
regionorder <- c("Southern Europe", "Eastern Europe", 
                 "Melanesia", "Southeastern Asia", "Southern Asia", "Western Asia", "Central Asia", 
                 "South America", "Central America", 
                 "Southern Africa", "Western Africa", "Northern Africa", "Middle Africa", "Eastern Africa"
                  )

onset.h1.sub.dropregion.effect$Region <- fct_relevel(onset.h1.sub.dropregion.effect$Region, regionorder)
incidence.h1.sub.dropregion.effect$Region <- fct_relevel(incidence.h1.sub.dropregion.effect$Region, regionorder)
attack.h1.sub.dropregion.effect$Region <- fct_relevel(attack.h1.sub.dropregion.effect$Region, regionorder)
best.h1.sub.dropregion.effect$Region <- fct_relevel(best.h1.sub.dropregion.effect$Region, regionorder)
  
# save as Rdata
save(onset.h1.sub.dropregion.effect, file = "Paper_Inputs/onset.h1.sub.dropregion.effect.Rdata")
save(incidence.h1.sub.dropregion.effect, file = "Paper_Inputs/incidence.h1.sub.dropregion.effect.Rdata")
save(attack.h1.sub.dropregion.effect, file = "Paper_Inputs/attack.h1.sub.dropregion.effect.Rdata")
save(best.h1.sub.dropregion.effect, file = "Paper_Inputs/best.h1.sub.dropregion.effect.Rdata")

@

<<DropRegionH1, eval = TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 13, fig.height = 7, out.width= "1.05\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap="This figure shows the barplot of number of provinces dropped per region, and the effect of the presence of refugee sites on conflict outcomes once each region is dropped, with 95$\\%$ CIs.">>=

# bring in data
load("Paper_Inputs/onset.h1.sub.dropregion.effect.Rdata")
load("Paper_Inputs/incidence.h1.sub.dropregion.effect.Rdata")
load("Paper_Inputs/attack.h1.sub.dropregion.effect.Rdata")
load("Paper_Inputs/best.h1.sub.dropregion.effect.Rdata")

#count number of province-years in each region
regiondropN <- tapply(geddatasub$GMI_ADMIN, geddatasub$REGION, length)/length(unique(geddatasub$year)) 

regiondropNdat <- data.frame(N = as.numeric(regiondropN), Region = names(regiondropN))

regionorder <- c("Southern Europe", "Eastern Europe", 
                 "Melanesia", "Southeastern Asia", "Southern Asia", "Western Asia", "Central Asia", 
                 "South America", "Central America", 
                 "Southern Africa", "Western Africa", "Northern Africa", "Middle Africa", "Eastern Africa"
                  )

regiondropNdat$Region <- fct_relevel(regiondropNdat$Region, regionorder)

RegionDropBar <- ggplot(regiondropNdat, 
       aes(x = Region, y = N
                        )) + 
  geom_bar(colour="black", fill="white", width=.8, stat="identity") +
  ylab("Number of Provinces") +
  xlab("Dropped Region") +
  ggtitle("Number of Provinces\nper Region") +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        ) +
  coord_flip()


# Onset (subset data) regional plot
PLOT.onset.h1.sub.dropregion.effect <- ggplot(onset.h1.sub.dropregion.effect, 
       aes(x = Region, y = Means
                        )) + 
  geom_point(size=2) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .5) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.05,.025) +
  ylab("Change in\nPredicted Probability") +
  ggtitle("Effect of Refugee Presence\non Onset (H1)") +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        axis.text.y=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks = element_blank() 
        ) +
  coord_flip()

# Incidence (subset data) regional plot
PLOT.incidence.h1.sub.dropregion.effect <- ggplot(incidence.h1.sub.dropregion.effect, 
       aes(x = Region, y = Means
                        )) + 
  geom_point(size=2) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .5) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.05,.025) +
  ylab("Change in\nPredicted Probability") +
  ggtitle("Effect of Refugee Presence\non Incidence (H1)") +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        axis.text.y=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks = element_blank() 
        ) +
  coord_flip()

# Violent events (subset data) regional plot
PLOT.attack.h1.sub.dropregion.effect <- ggplot(attack.h1.sub.dropregion.effect, 
       aes(x = Region, y = estimate
                        )) + 
  geom_point(size=2) +
  geom_errorbar(aes(ymin = as.numeric(as.character(conf.low)), 
                    ymax = as.numeric(as.character(conf.high))), 
                width = 0, size = .5) +
  geom_hline(aes(yintercept=0)) +
  ylim(-2,2) +
  ylab("Change in\nPredicted Number") +
  ggtitle("Effect of Refugee Presence\non Violent Events (H1)") +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        axis.text.y=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks = element_blank() 
        ) +
  coord_flip()

# Battle deaths (subset data) regional plot
PLOT.best.h1.sub.dropregion.effect <- ggplot(best.h1.sub.dropregion.effect, 
       aes(x = Region, y = estimate
                        )) + 
  geom_point(size=2) +
  geom_errorbar(aes(ymin = as.numeric(as.character(conf.low)), 
                    ymax = as.numeric(as.character(conf.high))), 
                width = 0, size = .5) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.15,.1) +
  ylab("Change in\nPredicted Number (logged)") +
  ggtitle("Effect of Refugee Presence\non Battle deaths (H1)") +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        axis.text.y=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks = element_blank() 
        ) +
  coord_flip()

# Plot
(RegionDropBar + PLOT.onset.h1.sub.dropregion.effect + PLOT.incidence.h1.sub.dropregion.effect + PLOT.attack.h1.sub.dropregion.effect + PLOT.best.h1.sub.dropregion.effect) + plot_layout(nrow = 1)

@

\newpage
\subsubsection{Robustness of the \emph{conditional risk reduction effect} conditional on Concentrated Refugee Presence (H2a)}

Figure \ref{fig:DropRegionH2} shows the barplot of number of provinces dropped per region and the \emph{conditional risk reduction effect}, in the case of geographically concentrated refugee sites, for conflict outcomes after dropping one region at a time. All estimates remain negative and aside from incidence after dropping Northern Africa, statistically significant. This analysis confirms that our findings are not driven by any particular region. 

<<DropRegionAnalysisH2_dontrun, eval=FALSE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

regions <- unique(geddatasub$REGION)

onset.h2.sub.dropregion.effect <- 
  incidence.h2.sub.dropregion.effect <- data.frame(Means = NA, 
                                             SE = NA, 
                                             CIU = NA, 
                                             CIL = NA, 
                                             Est = NA, 
                                             Plot = NA, 
                                             Region = NA)

attack.h2.sub.dropregion.effect <- 
  best.h2.sub.dropregion.effect <- data.frame(Means = NA, 
                                             SE = NA, 
                                             CIL = NA, 
                                             CIU = NA,
                                             Est = NA, 
                                             outcome = NA, 
                                             Country = NA)

registerDoMC(detectCores()-1)
dropregion_results <- foreach(i = 1:length(regions)) %dopar% {
  
  geddata.dropregion <- geddatasub[geddatasub$REGION != regions[i],]

  # run the main conflict models
  
  # Onset
  onset.h2.sub.dropregion <- bayesglm(onset.n ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata.dropregion[geddata.dropregion$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

  # Incidence
  incidence.h2.sub.dropregion <- bayesglm(incidence ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata.dropregion[geddata.dropregion$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 
  
  # Violent events
  attack.h2.sub.dropregion <- lm_robust(attack ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata.dropregion)
  
  # Battle deaths
  best.h2.sub.dropregion <- lm_robust(log(best+1) ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata.dropregion)
  
  #generate predicted probs
  onset.h2.sub.dropregion.dplot <- pred.bi.int(onset.h2.sub.dropregion)[3,]
  incidence.h2.sub.dropregion.dplot <- pred.bi.int(incidence.h2.sub.dropregion)[3,]
  attack.h2.sub.dropregion.dplot <- pred.lm.int.r(attack.h2.sub.dropregion)[1,] 
  best.h2.sub.dropregion.dplot <- pred.lm.int.r(best.h2.sub.dropregion)[1,] 
  
  #label region
  onset.h2.sub.dropregion.dplot$Region <- 
  incidence.h2.sub.dropregion.dplot$Region <- 
  attack.h2.sub.dropregion.dplot$Region <- 
  best.h2.sub.dropregion.dplot$Region <- as.character(regions[i])
  
  #only take main estimate
  return(list(onset.h2.sub.dropregion.dplot, incidence.h2.sub.dropregion.dplot, 
              attack.h2.sub.dropregion.dplot, best.h2.sub.dropregion.dplot))
}

onset.h2.sub.dropregion.effect <- bind_rows(lapply(dropregion_results, `[[`, 1))
incidence.h2.sub.dropregion.effect <- bind_rows(lapply(dropregion_results, `[[`, 2))
attack.h2.sub.dropregion.effect <- bind_rows(lapply(dropregion_results, `[[`, 3))
best.h2.sub.dropregion.effect <- bind_rows(lapply(dropregion_results, `[[`, 4))

# reorder Regions
regionorder <- c("Southern Europe", "Eastern Europe", 
                 "Melanesia", "Southeastern Asia", "Southern Asia", "Western Asia", "Central Asia", 
                 "South America", "Central America", 
                 "Southern Africa", "Western Africa", "Northern Africa", "Middle Africa", "Eastern Africa"
                  )

onset.h2.sub.dropregion.effect$Region <- fct_relevel(onset.h2.sub.dropregion.effect$Region, regionorder)
incidence.h2.sub.dropregion.effect$Region <- fct_relevel(incidence.h2.sub.dropregion.effect$Region, regionorder)
attack.h2.sub.dropregion.effect$Region <- fct_relevel(attack.h2.sub.dropregion.effect$Region, regionorder)
best.h2.sub.dropregion.effect$Region <- fct_relevel(best.h2.sub.dropregion.effect$Region, regionorder)
  
# save as Rdata
save(onset.h2.sub.dropregion.effect, file = "Paper_Inputs/onset.h2.sub.dropregion.effect.Rdata")
save(incidence.h2.sub.dropregion.effect, file = "Paper_Inputs/incidence.h2.sub.dropregion.effect.Rdata")
save(attack.h2.sub.dropregion.effect, file = "Paper_Inputs/attack.h2.sub.dropregion.effect.Rdata")
save(best.h2.sub.dropregion.effect, file = "Paper_Inputs/best.h2.sub.dropregion.effect.Rdata")

@

<<DropRegionH2, eval = TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 13, fig.height = 7, out.width= "1.05\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap="This figure shows the barplot of number of provinces dropped per region, and the \\emph{conditional risk reduction effect} once each region is dropped, with 95$\\%$ CIs.">>=

# bring in data
load("Paper_Inputs/onset.h2.sub.dropregion.effect.Rdata")
load("Paper_Inputs/incidence.h2.sub.dropregion.effect.Rdata")
load("Paper_Inputs/attack.h2.sub.dropregion.effect.Rdata")
load("Paper_Inputs/best.h2.sub.dropregion.effect.Rdata")

#count number of province-years in each region
regiondropN <- tapply(geddatasub$GMI_ADMIN, geddatasub$REGION, length)/length(unique(geddatasub$year)) 

regiondropNdat <- data.frame(N = as.numeric(regiondropN), Region = names(regiondropN))

regionorder <- c("Southern Europe", "Eastern Europe", 
                 "Melanesia", "Southeastern Asia", "Southern Asia", "Western Asia", "Central Asia", 
                 "South America", "Central America", 
                 "Southern Africa", "Western Africa", "Northern Africa", "Middle Africa", "Eastern Africa"
                  )

regiondropNdat$Region <- fct_relevel(regiondropNdat$Region, regionorder)

RegionDropBar <- ggplot(regiondropNdat, 
       aes(x = Region, y = N
                        )) + 
  geom_bar(colour="black", fill="white", width=.8, stat="identity") +
  ylab("Number of Provinces") +
  xlab("Dropped Region") +
  ggtitle("Number of Provinces\nper Region") +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        ) +
  coord_flip()


# Onset (subset data) regional plot
PLOT.onset.h2.sub.dropregion.effect <- ggplot(onset.h2.sub.dropregion.effect, 
       aes(x = Region, y = Means
                        )) + 
  geom_point(size=2) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .5) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.11,.01) +
  ylab("Change in\nPredicted Probability") +
  ggtitle("Effect of Concentrated Refugee\nPresence on Onset (H2a)") +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        axis.text.y=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks = element_blank(),
        plot.title = element_text(size = 11) 
        ) +
  coord_flip()

# Incidence (subset data) regional plot
PLOT.incidence.h2.sub.dropregion.effect <- ggplot(incidence.h2.sub.dropregion.effect, 
       aes(x = Region, y = Means
                        )) + 
  geom_point(size=2) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .5) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.11,.01) +
  ylab("Change in\nPredicted Probability") +
  ggtitle("Effect of Concentrated Refugee\nPresence on Incidence (H2a)") +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        axis.text.y=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks = element_blank(),
        plot.title = element_text(size = 11) 
        ) +
  coord_flip()

# Violent events (subset data) regional plot
PLOT.attack.h2.sub.dropregion.effect <- ggplot(attack.h2.sub.dropregion.effect, 
       aes(x = Region, y = Means
                        )) + 
  geom_point(size=2) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .5) +
  geom_hline(aes(yintercept=0)) +
  ylim(-4,.5) +
  ylab("Change in\nPredicted Number") +
  ggtitle("Effect of Concentrated Refugee\nPresence on Violent Events (H2a)") +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        axis.text.y=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks = element_blank(),
        plot.title = element_text(size = 11) 
        ) +
  coord_flip()

# Battle deaths (subset data) regional plot
PLOT.best.h2.sub.dropregion.effect <- ggplot(best.h2.sub.dropregion.effect, 
       aes(x = Region, y = Means
                        )) + 
  geom_point(size=2) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .5) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.35,.05) +
  ylab("Change in\nPredicted Number (logged)") +
  ggtitle("Effect of Concentrated Refugee\nPresence on Battle deaths (H2a)") +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        axis.text.y=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks = element_blank(),
        plot.title = element_text(size = 11) 
        ) +
  coord_flip()

# Plot
(RegionDropBar + PLOT.onset.h2.sub.dropregion.effect + PLOT.incidence.h2.sub.dropregion.effect + PLOT.attack.h2.sub.dropregion.effect + PLOT.best.h2.sub.dropregion.effect) + plot_layout(nrow = 1)

@


%% ANALYSIS WITH FULL DATASET OF ALL COUNTRIES
\newpage
\subsection{Analysis using Full Data of All Countries}
\label{SIsec:Full}

In this section, we rerun the main (H1) and secondary (H2) analyses in the paper using the full dataset of all countries, as opposed to subsetting down to only countries who have ever hosted refugees during the study period. While we do not believe including all countries, even those who never host refugees, is the correct counterfactual, we perform this analysis for completeness and as a robustness check. Results do not substantively change.

% PRED PROBS FIGURE FOR H1: Full Data
<<MainModelsH1_full, eval=TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 9, fig.height = 3, out.width= ".9\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure replicates Figure \\ref{fig:MainModelsH1_sub} with the full dataset of all countries.")>>=
 
setwd("Paper_Inputs")

# Bring in RData from UCDP data
load("plot.onset.h1.full.Rdata")
load("plot.incidence.h1.full.Rdata")

# Select and rename columns
plot.onset.h1.full$outcome <- "onset"
plot.incidence.h1.full$outcome <- "incidence"

# plots
pd <- position_dodge(0.5) # move them .05 to the left and right

PLOT.onsetincidence.h1.full <- bind_rows(plot.onset.h1.full[plot.onset.h1.full$Plot == 3,],
          plot.incidence.h1.full[plot.incidence.h1.full$Plot ==3,]) %>%
  mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.05, .05)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 

PLOT.attack.h1.full <- ggplot(plot.attack.h1.full, aes(x = outcome, y = estimate, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(conf.low)), 
                    ymax = as.numeric(as.character(conf.high))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-1.5,1.5) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h1.full <- ggplot(plot.best.h1.full, aes(x = outcome, y = estimate, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(conf.low)), 
                    ymax = as.numeric(as.character(conf.high))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.25,.25) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )


(PLOT.onsetincidence.h1.full + ggtitle('Effect of Refugee Presence on Conflict Outcomes (H1)') +
  theme(plot.title = element_text(hjust= 1.08))) +
  (PLOT.attack.h1.full + PLOT.best.h1.full) 

@

% PRED PROBS FIGURE FOR H2: Full Data
<<MainModelsH2_full, eval=TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 9, fig.height = 6, out.width= ".9\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure replicates Figure \\ref{fig:MainModelsH2_sub} with the full dataset of all countries.")>>=

setwd("Paper_Inputs")

# Bring in RData from UCDP data
load("plot.onset.h2.full.Rdata")
load("plot.incidence.h2.full.Rdata")

# Select and rename columns
plot.onset.h2.full$outcome <- "onset"
plot.incidence.h2.full$outcome <- "incidence"
plot.attack.h2.full$outcome <- "attack"
plot.best.h2.full$outcome <- "log(best + 1)"

# plots
pd <- position_dodge(0.5) # move them .05 to the left and right

PLOT.onsetincidence.h2a.full <- bind_rows(plot.onset.h2.full[plot.onset.h2.full$Plot == 3,],
          plot.incidence.h2.full[plot.incidence.h2.full$Plot ==3,]) %>%
  mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.12, .12)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 

PLOT.onsetincidence.h2b.full <- bind_rows(plot.onset.h2.full[plot.onset.h2.full$Plot == 6,],
          plot.incidence.h2.full[plot.incidence.h2.full$Plot ==6,]) %>%
  mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.12, .12)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.attack.h2a.full <- ggplot(plot.attack.h2.full[c(1,3),], aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-3,3) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.attack.h2b.full <- ggplot(plot.attack.h2.full[c(2,4),], aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-3,3) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h2a.full <- ggplot(plot.best.h2.full[c(1,3),], aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.5,.5) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h2b.full <- ggplot(plot.best.h2.full[c(2,4),], aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.5,.5) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )


patch1 <- (PLOT.onsetincidence.h2a.full + ggtitle("Effect of Concentrated Refugee Presence (H2a)") + 
  theme(plot.title = element_text(hjust= 13))) +
  (PLOT.attack.h2a.full + PLOT.best.h2a.full) 

patch2 <- (PLOT.onsetincidence.h2b.full + ggtitle("Effect of Dispersed Refugee Presence (H2b)") + 
  theme(plot.title = element_text(hjust= -2.9))) +
  (PLOT.attack.h2b.full + PLOT.best.h2b.full) 

patch1/patch2

@


%% DYANAMIC SUBSET DATA
\newpage
\subsection{Using the Dynamic Subset Data of only Refugee-hosting Countries}
\label{SIsec:DynamicSubsetAnalysis}

This section reruns the main analyses using a modified subset data of only countries that have ever hosted refugee sites. Unlike the original data which includes all countries that have ever hosted refugees \textit{anytime} during our study time perid 1990 - 2008, \textit{this version} is subsetted dynamically to address possible concerns of collider bias, which can emerge when analysis at a certain time conditions on variables in the future \citep{Morgan:2014}. In our case, this means at time $t$ we are including the subset of all countries that not only have hosted refugees by time $t$ but also will host refugees after time $t$. To address this possible concern, here we create a \textit{dynamic subset} in which at any year $t$, we only include the subset of countries that have \textit{already} hosted refugees in the years preceding $t$, so our variation comes from provinces within these already-hosted countries that have not or have not yet hosted a refugee site. However, using this dynamic subset data greatly decreases our number of observations from \numprint{\Sexpr{nrow(geddatasub)}} province-years in the main data to \numprint{\Sexpr{nrow(geddatasub2)}} province-years. Results do not substantively change. 

%Figures \ref{fig:DynamicSubsetH1} and \ref{fig:DynamicSubsetH2} for the main and secondary re-analyses, respectively, confirm that refugee sites have null effects on conflict onset and incidence. In the secondary analysis, the negative point estimate for the \emph{conditional risk reduction effect} remains, and retains its statistical significance for conflict onset. 

<<DynamicSubsetOnsetModelsH1, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

# If perfect separation, because too many countries with no conflict in the full dataset, uninformitive priors are chosen based on the stan-dev Prior Choice wiki: https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations

## Treatment model 3, with subset 2 data
onset.h1.sub2 <- bayesglm(onset.n ~ rtb + 
                       rtb.other + 
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub2[geddatasub2$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h1.sub2)

## Placebo model
onset.h1.sub2.pla <- bayesglm(onset.n ~ rtb.placebo + 
                       rtb.other + 
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub2[plagedrtbsub2$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h1.sub2.pla)

# Generate predicted probs
onset.h1.sub2.dplot <- pred.bi(onset.h1.sub2) #generate predicted probs
onset.h1.sub2.dplot$Group <- "Actual presence" #label the type of estimate

onset.h1.sub2.pla.dplot <- pred.bi(onset.h1.sub2.pla)
onset.h1.sub2.pla.dplot$Group <- "Placebo presence"

plot.onset.h1.sub2 <- rbind(onset.h1.sub2.dplot, #combine for ggplot
                           onset.h1.sub2.pla.dplot)

# save output as Rdata files
# save(onset.h1.sub2, file = "onset.h1.sub2.Rdata")
# save(onset.h1.sub2.pla, file = "onset.h1.sub2.pla.Rdata")
# save(plot.onset.h1.sub2, file = "plot.onset.h1.sub2.Rdata")

@

<<DynamicSubsetIncidenceModelsH1, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 3, with subset 2 data
incidence.h1.sub2 <- bayesglm(incidence ~ rtb + 
                       rtb.other + 
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub2[geddatasub2$year < 2009,],
               #prior.scale = 1, prior.df = 3,
               family = binomial(link = "logit")) 

#summary(incidence.h1.sub2)

## Placebo model
incidence.h1.sub2.pla <- bayesglm(incidence ~ rtb.placebo + 
                       rtb.other + 
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub2[plagedrtbsub2$year < 2009,],
               #prior.scale = 1, prior.df = 3,
               family = binomial(link = "logit")) 

#summary(incidence.h1.sub2.pla)

# Generate predicted probs
incidence.h1.sub2.dplot <- pred.bi(incidence.h1.sub2) #generate predicted probs
incidence.h1.sub2.dplot$Group <- "Actual presence" #label the type of estimate

incidence.h1.sub2.pla.dplot <- pred.bi(incidence.h1.sub2.pla)
incidence.h1.sub2.pla.dplot$Group <- "Placebo presence"

plot.incidence.h1.sub2 <- rbind(incidence.h1.sub2.dplot, #combine for ggplot
                           incidence.h1.sub2.pla.dplot)

# save output as Rdata files
# save(incidence.h1.sub2, file = "incidence.h1.sub2.Rdata")
# save(incidence.h1.sub2.pla, file = "incidence.h1.sub2.pla.Rdata")
# save(plot.incidence.h1.sub2, file = "plot.incidence.h1.sub2.Rdata")

@

<<DynamicSubsetMainAttackModelsH1, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 3, with subset2 data
attack.h1.sub2 <- lm_robust(attack ~ rtb + 
                       rtb.other + 
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub2)

#summary(attack.h1.sub2)

## Placebo model
attack.h1.sub2.pla <- lm_robust(attack ~ rtb.placebo + 
                       rtb.other + 
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub2)

#summary(attack.h1.sub2.pla)


# Generate predicted probs
attack.h1.sub2.dplot <- tidy(attack.h1.sub2)[attack.h1.sub2$term == "rtb",] 
attack.h1.sub2.dplot$Group <- "Actual presence" #label the type of estimate


attack.h1.sub2.pla.dplot <- tidy(attack.h1.sub2.pla)[attack.h1.sub2.pla$term == "rtb.placebo",] 
attack.h1.sub2.pla.dplot$Group <- "Placebo presence"
 
plot.attack.h1.sub2 <- rbind(attack.h1.sub2.dplot, #combine for ggplot
                            attack.h1.sub2.pla.dplot)

@

<<DynamicSubsetMainBattleDeathModelsH1, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 3, with subset2 data
best.h1.sub2 <- lm_robust(log(best+1) ~ rtb + 
                       rtb.other + 
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = geddatasub2)

#summary(best.h1.sub2)

## Placebo model
best.h1.sub2.pla <- lm_robust(log(best+1) ~ rtb.placebo + 
                       rtb.other + 
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = plagedrtbsub2)

#summary(best.h1.sub2.pla)


# Generate predicted probs
best.h1.sub2.dplot <- tidy(best.h1.sub2)[best.h1.sub2$term == "rtb",] 
best.h1.sub2.dplot$Group <- "Actual presence" #label the type of estimate

best.h1.sub2.pla.dplot <- tidy(best.h1.sub2.pla)[best.h1.sub2.pla$term == "rtb.placebo",] 
best.h1.sub2.pla.dplot$Group <- "Placebo presence" #label the type of estimate

plot.best.h1.sub2 <- rbind(best.h1.sub2.dplot, #combine for ggplot
                           best.h1.sub2.pla.dplot)

@

% PRED PROBS FIGURE FOR H1: DYNAMIC SUBSET
<<DynamicSubsetModelsH1_sub2, eval=TRUE, echo = FALSE, tidy=TRUE, fig.pos='H', fig.width = 9, fig.height = 3, out.width= "1\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure replicates Figure \\ref{fig:MainModelsH1_sub} with the dynamic subset data.")>>=
 
setwd("Paper_Inputs")

# Bring in RData from UCDP data
load("plot.onset.h1.sub2.Rdata")
load("plot.incidence.h1.sub2.Rdata")

# Select and rename columns
plot.onset.h1.sub2$outcome <- "onset"
plot.incidence.h1.sub2$outcome <- "incidence"

# plots
pd <- position_dodge(0.5) # move them .05 to the left and right

PLOT.onsetincidence.h1.sub2 <- bind_rows(plot.onset.h1.sub2[plot.onset.h1.sub2$Plot == 3,],
          plot.incidence.h1.sub2[plot.incidence.h1.sub2$Plot == 3,]) %>%
  mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.05, .05)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 

PLOT.attack.h1.sub2 <- ggplot(plot.attack.h1.sub2, aes(x = outcome, y = estimate, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(conf.low)), 
                    ymax = as.numeric(as.character(conf.high))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-1.5,1.5) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h1.sub2 <- ggplot(plot.best.h1.sub2, aes(x = outcome, y = estimate, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(conf.low)), 
                    ymax = as.numeric(as.character(conf.high))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.25,.25) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )


(PLOT.onsetincidence.h1.sub2 + ggtitle('Effect of Refugee Presence on Conflict Outcomes (H1)') +
  theme(plot.title = element_text(hjust= 1.08))) +
  (PLOT.attack.h1.sub2 + PLOT.best.h1.sub2) 

@

<<DynamicSubsetOnsetModelsH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 3, with subset 2 data
onset.h2.sub2 <- bayesglm(onset.n ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub2[geddatasub2$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h2.sub2)

## Placebo model
onset.h2.sub2.pla <- bayesglm(onset.n ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub2[plagedrtbsub2$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h2.sub2.pla)

# Generate predicted probs
onset.h2.sub2.dplot <- pred.bi.int(onset.h2.sub2) #generate predicted probs
onset.h2.sub2.dplot$Group <- "Actual presence" #label the type of estimate

onset.h2.sub2.pla.dplot <- pred.bi.int(onset.h2.sub2.pla)
onset.h2.sub2.pla.dplot$Group <- "Placebo presence"

plot.onset.h2.sub2 <- rbind(onset.h2.sub2.dplot, #combine for ggplot
                            onset.h2.sub2.pla.dplot)

# save output as Rdata files
# save(onset.h2.sub2, file = "onset.h2.sub2.Rdata")
# save(onset.h2.sub2.pla, file = "onset.h2.sub2.pla.Rdata")
# save(plot.onset.h2.sub2, file = "plot.onset.h2.sub2.Rdata")

@

<<DynamicSubsetIncidenceModelsH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 3, with subset data
incidence.h2.sub2 <- bayesglm(incidence ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub2[geddatasub2$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h2.sub2)

## Placebo model
incidence.h2.sub2.pla <- bayesglm(incidence ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub2[plagedrtbsub2$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h2.sub2.pla)

# Generate predicted probs
incidence.h2.sub2.dplot <- pred.bi.int(incidence.h2.sub2) #generate predicted probs
incidence.h2.sub2.dplot$Group <- "Actual presence" #label the type of estimate

incidence.h2.sub2.pla.dplot <- pred.bi.int(incidence.h2.sub2.pla)
incidence.h2.sub2.pla.dplot$Group <- "Placebo presence"

plot.incidence.h2.sub2 <- rbind(incidence.h2.sub2.dplot, #combine for ggplot
                                incidence.h2.sub2.pla.dplot)

# save output as Rdata files
# save(incidence.h2.sub2, file = "incidence.h2.sub2.Rdata")
# save(incidence.h2.sub2.pla, file = "incidence.h2.sub2.pla.Rdata")
# save(plot.incidence.h2.sub2, file = "plot.incidence.h2.sub2.Rdata")

@

<<DynamicSubsetAttackModelsH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 3, with subset data
attack.h2.sub2 <- lm_robust(attack ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub2)

#summary(attack.h2.sub2)

## Placebo model
attack.h2.sub2.pla <- lm_robust(attack ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub2)

#summary(attack.h2.sub2.pla)


# Generate predicted probs
attack.h2.sub2.dplot <- pred.lm.int.r(attack.h2.sub2)
attack.h2.sub2.dplot$Group <- "Actual presence" #label the type of estimate

attack.h2.sub2.pla.dplot <- pred.lm.int.r(attack.h2.sub2.pla)
attack.h2.sub2.pla.dplot$Group <- "Placebo presence"

plot.attack.h2.sub2 <- rbind(attack.h2.sub2.dplot, #combine for ggplot
                             attack.h2.sub2.pla.dplot)
@

<<DynamicSubsetBattleDeathModelsH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 3, with subset data
best.h2.sub2 <- lm_robust(log(best+1) ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub2)

#summary(best.h2.sub2)

## Placebo model
best.h2.sub2.pla <- lm_robust(log(best+1) ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year), 
               data = plagedrtbsub2)

#summary(best.h2.sub2.pla)


# Generate predicted probs
best.h2.sub2.dplot <- pred.lm.int.r(best.h2.sub2)
best.h2.sub2.dplot$Group <- "Actual presence" #label the type of estimate

best.h2.sub2.pla.dplot <- pred.lm.int.r(best.h2.sub2.pla)
best.h2.sub2.pla.dplot$Group <- "Placebo presence"

plot.best.h2.sub2 <- rbind(best.h2.sub2.dplot, #combine for ggplot
                           best.h2.sub2.pla.dplot)

@

% PRED PROBS FIGURE FOR H2: DYNAMIC SUBSET
<<DynamicSubsetModelsH2_sub2, eval=TRUE, echo = FALSE, tidy=TRUE, fig.width = 9, fig.height = 6, out.width= "1\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure replicates Figure \\ref{fig:MainModelsH2_sub} with the dynamic subset data.")>>=

# setwd("Paper_Inputs")
# 
# # Bring in RData from UCDP data
# load("plot.onset.h2.sub.Rdata")
# load("plot.incidence.h2.sub.Rdata")

# Select and rename columns
plot.onset.h2.sub$outcome <- "onset"
plot.incidence.h2.sub$outcome <- "incidence"
plot.attack.h2.sub$outcome <- "attack"
plot.best.h2.sub$outcome <- "log(best + 1)"

# plots
pd <- position_dodge(0.5) # move them .05 to the left and right

PLOT.onsetincidence.h2a.sub <- bind_rows(plot.onset.h2.sub[plot.onset.h2.sub$Plot == 3,],
          plot.incidence.h2.sub[plot.incidence.h2.sub$Plot ==3,]) %>%
  mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.12, .12)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 

PLOT.onsetincidence.h2b.sub <- bind_rows(plot.onset.h2.sub[plot.onset.h2.sub$Plot == 6,],
          plot.incidence.h2.sub[plot.incidence.h2.sub$Plot ==6,]) %>%
  mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.12, .12)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.attack.h2a.sub <- ggplot(plot.attack.h2.sub[c(1,3),], aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-3,3) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.attack.h2b.sub <- ggplot(plot.attack.h2.sub[c(2,4),], aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-3,3) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h2a.sub <- ggplot(plot.best.h2.sub[c(1,3),], aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.5,.5) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h2b.sub <- ggplot(plot.best.h2.sub[c(2,4),], aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.5,.5) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

patch1 <- (PLOT.onsetincidence.h2a.sub + ggtitle("Effect of Concentrated Refugee Presence (H2a)") + 
  theme(plot.title = element_text(hjust= 13))) +
  (PLOT.attack.h2a.sub + PLOT.best.h2a.sub) 

patch2 <- (PLOT.onsetincidence.h2b.sub + ggtitle("Effect of Dispersed Refugee Presence (H2b)") + 
  theme(plot.title = element_text(hjust= -2.9))) +
  (PLOT.attack.h2b.sub + PLOT.best.h2b.sub) 

patch1/patch2


@


% Lead the DVS
\clearpage
\subsection{Leading the Dependent Variables by 5 Years}
\label{SIsec:Lead5Analysis}

This section shows the main analysis replacing the conflict onset and incidence outcome variables with conflict onset and incidence five years into the future (lead 5 years) to confirm that there are no delayed effects of the presence of refugee sites on conflict. Note that several of the placebo tests cannot rule out selection effects; however, this would imply refugees selecting into more conflict-prone areas, not that they are selecting less conflict-prone areas. 

<<Lead5OnsetModelsH1, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 3, with subset data
onset.h1.subl5 <- bayesglm(onset.n5 ~ rtb + 
                       rtb.other + 
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h1.sub.l5)

## Placebo model
onset.h1.subl5.pla <- bayesglm(onset.n5 ~ rtb.placebo + 
                       rtb.other + 
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub[plagedrtbsub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h1.subl5.pla)

# Generate predicted probs
onset.h1.subl5.dplot <- pred.bi(onset.h1.subl5) #generate predicted probs
onset.h1.subl5.dplot$Group <- "Actual presence" #label the type of estimate

onset.h1.subl5.pla.dplot <- pred.bi(onset.h1.subl5.pla)
onset.h1.subl5.pla.dplot$Group <- "Placebo presence"

plot.onset.h1.subl5 <- rbind(onset.h1.subl5.dplot, #combine for ggplot
                             onset.h1.subl5.pla.dplot)

# save output as Rdata files
# save(onset.h1.subl5, file = "onset.h1.subl5.Rdata")
# save(onset.h1.subl5.pla, file = "onset.h1.subl5.pla.Rdata")
# save(plot.onset.h1.subl5, file = "plot.onset.h1.subl5.Rdata")

@

<<Lead5IncidenceModelsH1, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 3, with subset data
incidence.h1.subl5 <- bayesglm(incidence5 ~ rtb + 
                       rtb.other + 
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               #prior.scale = 1, prior.df = 3,
               family = binomial(link = "logit")) 

#summary(incidence.h1.subl5)

## Placebo model
incidence.h1.subl5.pla <- bayesglm(incidence5 ~ rtb.placebo + 
                       rtb.other + 
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub[plagedrtbsub$year < 2009,],
               #prior.scale = 1, prior.df = 3,
               family = binomial(link = "logit")) 

#summary(incidence.h1.subl5.pla)

# Generate predicted probs
incidence.h1.subl5.dplot <- pred.bi(incidence.h1.subl5) #generate predicted probs
incidence.h1.subl5.dplot$Group <- "Actual presence" #label the type of estimate

incidence.h1.subl5.pla.dplot <- pred.bi(incidence.h1.subl5.pla)
incidence.h1.subl5.pla.dplot$Group <- "Placebo presence"

plot.incidence.h1.subl5 <- rbind(incidence.h1.subl5.dplot, #combine for ggplot
                           incidence.h1.subl5.pla.dplot)

# save output as Rdata files
# save(incidence.h1.subl5, file = "incidence.h1.subl5.Rdata")
# save(incidence.h1.subl5.pla, file = "incidence.h1.subl5.pla.Rdata")
# save(plot.incidence.h1.subl5, file = "plot.incidence.h1.subl5.Rdata")

@

<<Lead5AttackModelsH1, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 3, with subset data
attack.h1.subl5 <- lm_robust(attack5 ~ rtb + 
                       rtb.other + 
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(attack.h1.subl5)

## Placebo model
attack.h1.subl5.pla <- lm_robust(attack5 ~ rtb.placebo + 
                       rtb.other + 
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub)

#summary(attack.h1.subl5.pla)


# Generate predicted probs
attack.h1.subl5.dplot <- tidy(attack.h1.subl5)[attack.h1.subl5$term == "rtb",] 
attack.h1.subl5.dplot$Group <- "Actual presence" #label the type of estimate


attack.h1.subl5.pla.dplot <- tidy(attack.h1.subl5.pla)[attack.h1.subl5.pla$term == "rtb.placebo",] 
attack.h1.subl5.pla.dplot$Group <- "Placebo presence"
 
plot.attack.h1.subl5 <- rbind(attack.h1.subl5.dplot, #combine for ggplot
                            attack.h1.subl5.pla.dplot)

@

<<Lead5BattleDeathModelsH1, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 3, with subset data
best.h1.subl5 <- lm_robust(log(best5+1) ~ rtb + 
                       rtb.other + 
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = geddatasub)

#summary(best.h1.subl5)

## Placebo model
best.h1.subl5.pla <- lm_robust(log(best5+1) ~ rtb.placebo + 
                       rtb.other + 
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = plagedrtbsub)

#summary(best.h1.subl5.pla)


# Generate predicted probs
best.h1.subl5.dplot <- tidy(best.h1.subl5)[best.h1.subl5$term == "rtb",] 
best.h1.subl5.dplot$Group <- "Actual presence" #label the type of estimate

best.h1.subl5.pla.dplot <- tidy(best.h1.subl5.pla)[best.h1.subl5.pla$term == "rtb.placebo",] 
best.h1.subl5.pla.dplot$Group <- "Placebo presence" #label the type of estimate

plot.best.h1.subl5 <- rbind(best.h1.subl5.dplot, #combine for ggplot
                            best.h1.subl5.pla.dplot)

@

% PRED PROBS FIGURE FOR H1: LEAD 5
<<Lead5ModelsH1_sub, eval=TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 9, fig.height = 3, out.width= ".99\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure replicates Figure \\ref{fig:MainModelsH1_sub} with a 5-year lead DV.")>>=
 
# setwd("Paper_Inputs")
# 
# # Bring in RData from UCDP data
# load("plot.onset.h1.subl5.Rdata")
# load("plot.incidence.h1.subl5.Rdata")

# Select and rename columns
plot.onset.h1.subl5$outcome <- "onset"
plot.incidence.h1.subl5$outcome <- "incidence"

# plots
pd <- position_dodge(0.5) # move them .05 to the left and right

PLOT.onsetincidence.h1.subl5 <- bind_rows(plot.onset.h1.subl5[plot.onset.h1.subl5$Plot == 3,],
          plot.incidence.h1.subl5[plot.incidence.h1.subl5$Plot ==3,]) %>%
  mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), 
                     limits = c(-.12, .12)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 

PLOT.attack.h1.subl5 <- ggplot(plot.attack.h1.subl5, aes(x = outcome, y = estimate, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(conf.low)), 
                    ymax = as.numeric(as.character(conf.high))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-5,5) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h1.subl5 <- ggplot(plot.best.h1.subl5, aes(x = outcome, y = estimate, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(conf.low)), 
                    ymax = as.numeric(as.character(conf.high))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.25,.25) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )


(PLOT.onsetincidence.h1.subl5 + ggtitle('Effect of Refugee Presence on Conflict Outcomes (H1)') +
  theme(plot.title = element_text(hjust= 1.08))) +
  (PLOT.attack.h1.subl5 + PLOT.best.h1.subl5) 

@

<<Lead5OnsetModelsH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 3, with subset data
onset.h2.subl5 <- bayesglm(onset.n5 ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h2.subl5)

## Placebo model
onset.h2.subl5.pla <- bayesglm(onset.n5 ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       onset.n_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub[plagedrtbsub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h2.subl5.pla)

# Generate predicted probs
onset.h2.subl5.dplot <- pred.bi.int(onset.h2.subl5) #generate predicted probs
onset.h2.subl5.dplot$Group <- "Actual presence" #label the type of estimate

onset.h2.subl5.pla.dplot <- pred.bi.int(onset.h2.subl5.pla)
onset.h2.subl5.pla.dplot$Group <- "Placebo presence"

plot.onset.h2.subl5 <- rbind(onset.h2.subl5.dplot, #combine for ggplot
                           onset.h2.subl5.pla.dplot)

# save output as Rdata files
# save(onset.h2.subl5, file = "onset.h2.subl5.Rdata")
# save(onset.h2.subl5.pla, file = "onset.h2.subl5.pla.Rdata")
# save(plot.onset.h2.subl5, file = "plot.onset.h2.subl5.Rdata")

@

<<Lead5IncidenceModelsH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 3, with subset data
incidence.h2.subl5 <- bayesglm(incidence5 ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub[geddatasub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h2.subl5)

## Placebo model
incidence.h2.subl5.pla <- bayesglm(incidence5 ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       incidence_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub[plagedrtbsub$year < 2009,],
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h2.subl5.pla)

# Generate predicted probs
incidence.h2.subl5.dplot <- pred.bi.int(incidence.h2.subl5) #generate predicted probs
incidence.h2.subl5.dplot$Group <- "Actual presence" #label the type of estimate

incidence.h2.subl5.pla.dplot <- pred.bi.int(incidence.h2.subl5.pla)
incidence.h2.subl5.pla.dplot$Group <- "Placebo presence"

plot.incidence.h2.subl5 <- rbind(incidence.h2.subl5.dplot, #combine for ggplot
                           incidence.h2.subl5.pla.dplot)

# save output as Rdata files
# save(incidence.h2.subl5, file = "incidence.h2.subl5.Rdata")
# save(incidence.h2.subl5.pla, file = "incidence.h2.subl5.pla.Rdata")
# save(plot.incidence.h2.subl5, file = "plot.incidence.h2.subl5.Rdata")

@

<<Lead5AttackModelsH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 3, with subset data
attack.h2.subl5 <- lm_robust(attack5 ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(attack.h2.subl5)

## Placebo model
attack.h2.subl5.pla <- lm_robust(attack5 ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub)

#summary(attack.h2.subl5.pla)


# Generate predicted probs
attack.h2.subl5.dplot <- pred.lm.int.r(attack.h2.subl5)
attack.h2.subl5.dplot$Group <- "Actual presence" #label the type of estimate

attack.h2.subl5.pla.dplot <- pred.lm.int.r(attack.h2.subl5.pla)
attack.h2.subl5.pla.dplot$Group <- "Placebo presence"

plot.attack.h2.subl5 <- rbind(attack.h2.subl5.dplot, #combine for ggplot
                            attack.h2.subl5.pla.dplot)
@

<<Lead5BattleDeathModelsH2, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 3, with subset data
best.h2.subl5 <- lm_robust(log(best5+1) ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(best.h2.subl5)

## Placebo model
best.h2.subl5.pla <- lm_robust(log(best5+1) ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year), 
               data = plagedrtbsub)

#summary(best.h2.subl5.pla)


# Generate predicted probs
best.h2.subl5.dplot <- pred.lm.int.r(best.h2.subl5)
best.h2.subl5.dplot$Group <- "Actual presence" #label the type of estimate

best.h2.subl5.pla.dplot <- pred.lm.int.r(best.h2.subl5.pla)
best.h2.subl5.pla.dplot$Group <- "Placebo presence"

plot.best.h2.subl5 <- rbind(best.h2.subl5.dplot, #combine for ggplot
                            best.h2.subl5.pla.dplot)

@

% PRED PROBS FIGURE FOR H2: LEAD 5
<<Lead5ModelsH2_sub, eval=TRUE, echo = FALSE, tidy=TRUE, fig.pos ='H', fig.width = 9, fig.height = 6, out.width= ".99\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure replicates Figure \\ref{fig:MainModelsH2_sub} with a 5-year lead DV.")>>=

# setwd("Paper_Inputs")
# 
# # Bring in RData from UCDP data
# load("plot.onset.h2.subl5.Rdata")
# load("plot.incidence.h2.subl5.Rdata")

# Select and rename columns
plot.onset.h2.subl5$outcome <- "onset"
plot.incidence.h2.subl5$outcome <- "incidence"
plot.attack.h2.subl5$outcome <- "attack"
plot.best.h2.subl5$outcome <- "log(best + 1)"

# plots
pd <- position_dodge(0.5) # move them .05 to the left and right

PLOT.onsetincidence.h2a.subl5 <- bind_rows(plot.onset.h2.subl5[plot.onset.h2.subl5$Plot == 3,],
          plot.incidence.h2.subl5[plot.incidence.h2.subl5$Plot ==3,]) %>%
  mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), 
                     limits = c(-.12, .12)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.onsetincidence.h2b.subl5 <- bind_rows(plot.onset.h2.subl5[plot.onset.h2.subl5$Plot == 6,],
          plot.incidence.h2.subl5[plot.incidence.h2.subl5$Plot ==6,]) %>%
  mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.12, .12)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.attack.h2a.subl5 <- ggplot(plot.attack.h2.subl5[c(1,3),], aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-7,7) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.attack.h2b.subl5 <- ggplot(plot.attack.h2.subl5[c(2,4),], aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-7,7) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h2a.subl5 <- ggplot(plot.best.h2.subl5[c(1,3),], aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.5,.5) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h2b.subl5 <- ggplot(plot.best.h2.subl5[c(2,4),], aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.5,.5) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

patch1 <- (PLOT.onsetincidence.h2a.subl5 + ggtitle("Effect of Concentrated Refugee Presence (H2a)") + 
  theme(plot.title = element_text(hjust= 13))) +
  (PLOT.attack.h2a.subl5 + PLOT.best.h2a.subl5) 

patch2 <- (PLOT.onsetincidence.h2b.subl5 + ggtitle("Effect of Dispersed Refugee Presence (H2b)") + 
  theme(plot.title = element_text(hjust= -2.9))) +
  (PLOT.attack.h2b.subl5 + PLOT.best.h2b.subl5) 

patch1/patch2


@


%% ANALYSIS USING WZONE
\newpage
\subsection{Analysis using wzoneData conflict outcomes}
\label{SIsec:wzone}

For analyses examining conflict onset and incidence, we replace PRIO Conflict Circles (1990 - 2008) with wzoneData Conflict Polygons (1990 - 2018) \citep{Kikuta:2020}. The conflict polygons from wzoneData are created from UCDP GED 19.1 conflict events data \citep{Sundberg:2013,Pettersson:2020}, using a machine learning method. Note that this version of wzoneData excludes conflict events from the Syrian Civil War, activity by the Islamic State, and the US War in Afghanistan and Iraq. The effects shown for number of violent events and the number of logged battle deaths are the same as in the paper, based on UCDP GED 19.1 conflict events data \citep{Sundberg:2013,Pettersson:2020}. 

With this alternative measure of onset and incidence, Figure \ref{fig:MainModelsH1_sub_wzone} confirms that there is still not effect of refugee presence on these outcomes. For H2, the \textit{conditional risk reduction effects} are now null.

<<MainOnsetModelsH1_wzone, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model, with subset data
onset.h1.sub.w <- bayesglm(onset.n_wzone ~ rtb + 
                       rtb.other + 
                       onset.n_wzone_1 +
                       #attack_neighbors_sum +
                       incidence_wzone_neighbor +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub,
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h1.sub.w)

## Placebo model
onset.h1.sub.pla.w <- bayesglm(onset.n_wzone ~ rtb.placebo + 
                       rtb.other + 
                       onset.n_wzone_1 +
                       #attack_neighbors_sum +
                       incidence_wzone_neighbor +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                      log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
                data = plagedrtbsub,
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h1.sub.pla.w)

# Generate predicted probs
onset.h1.sub.dplot.w <- pred.bi(onset.h1.sub.w) #generate predicted probs
onset.h1.sub.dplot.w$Group <- "Actual presence" #label the type of estimate

onset.h1.sub.pla.dplot.w <- pred.bi(onset.h1.sub.pla.w)
onset.h1.sub.pla.dplot.w$Group <- "Placebo presence"

plot.onset.h1.sub.w <- rbind(onset.h1.sub.dplot.w, #combine for ggplot
                           onset.h1.sub.pla.dplot.w)

# save output as Rdata files
# save(onset.h1.sub.w, file = "onset.h1.sub.w.Rdata")
# save(onset.h1.sub.pla.w, file = "onset.h1.sub.pla.w.Rdata")
# save(plot.onset.h1.sub.w, file = "plot.onset.h1.sub.w.Rdata")

@

<<MainIncidenceModelsH1_wzone, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model, with subset data
incidence.h1.sub.w <- bayesglm(incidence_wzone ~ rtb + 
                       rtb.other + 
                       incidence_wzone_1 +
                       #attack_neighbors_sum +
                       incidence_wzone_neighbor +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub,
               #prior.scale = 1, prior.df = 3,
               family = binomial(link = "logit")) 

#summary(incidence.h1.sub.w)

## Placebo model
incidence.h1.sub.pla.w <- bayesglm(incidence_wzone ~ rtb.placebo + 
                       rtb.other + 
                       incidence_wzone_1 +
                       #attack_neighbors_sum +
                       incidence_wzone_neighbor +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub,
               #prior.scale = 1, prior.df = 3,
               family = binomial(link = "logit")) 

#summary(incidence.h1.sub.pla.w)

# Generate predicted probs
incidence.h1.sub.dplot.w <- pred.bi(incidence.h1.sub.w) #generate predicted probs
incidence.h1.sub.dplot.w$Group <- "Actual presence" #label the type of estimate

incidence.h1.sub.pla.dplot.w <- pred.bi(incidence.h1.sub.pla.w)
incidence.h1.sub.pla.dplot.w$Group <- "Placebo presence"

plot.incidence.h1.sub.w <- rbind(incidence.h1.sub.dplot.w, #combine for ggplot
                           incidence.h1.sub.pla.dplot.w)

# save output as Rdata files
# save(incidence.h1.sub.w, file = "incidence.h1.sub.w.Rdata")
# save(incidence.h1.sub.pla.w, file = "incidence.h1.sub.pla.w.Rdata")
# save(plot.incidence.h1.sub.w, file = "plot.incidence.h1.sub.w.Rdata")

@

% PRED PROBS FIGURE FOR H1: WZONE
<<MainModelsH1_sub_wzone, eval=TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 9, fig.height = 3, out.width= ".99\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure replicates Figure \\ref{fig:MainModelsH1_sub} using wzoneData-based Conflict Onset and Incidence measures.")>>=

# setwd("Paper_Inputs")
# 
# # Bring in RData from UCDP data
# load("plot.onset.h1.sub.w.Rdata")
# load("plot.incidence.h1.sub.w.Rdata")

# Select and rename columns
plot.onset.h1.sub.w$outcome <- "onset"
plot.incidence.h1.sub.w$outcome <- "incidence"

# plots
pd <- position_dodge(0.5) # move them .05 to the left and right

PLOT.onsetincidence.h1.sub <- bind_rows(plot.onset.h1.sub.w[plot.onset.h1.sub.w$Plot == 3,],
          plot.incidence.h1.sub.w[plot.incidence.h1.sub.w$Plot ==3,]) %>%
  mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.15, .15)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 

PLOT.attack.h1.sub <- ggplot(plot.attack.h1.sub, aes(x = outcome, y = estimate, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(conf.low)), 
                    ymax = as.numeric(as.character(conf.high))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-1.5,1.5) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h1.sub <- ggplot(plot.best.h1.sub, aes(x = outcome, y = estimate, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(conf.low)), 
                    ymax = as.numeric(as.character(conf.high))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.25,.25) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

# Plot
(PLOT.onsetincidence.h1.sub + ggtitle('Effect of Refugee Presence on Conflict Outcomes (H1)') +
  theme(plot.title = element_text(hjust= 0, size = 11))) +
  (PLOT.attack.h1.sub + PLOT.best.h1.sub) 

@


<<MainOnsetModelsH2_wzone, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 3, with subset data
onset.h2.sub.w <- bayesglm(onset.n_wzone ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       onset.n_wzone_1 +
                       #attack_neighbors_sum +
                       incidence_wzone_neighbor +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub,
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h2.sub.w)

## Placebo model
onset.h2.sub.pla.w <- bayesglm(onset.n_wzone ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       onset.n_wzone_1 +
                       #attack_neighbors_sum +
                       incidence_wzone_neighbor +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub,
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(onset.h2.sub.pla.w)

# Generate predicted probs
onset.h2.sub.dplot.w <- pred.bi.int(onset.h2.sub.w) #generate predicted probs
onset.h2.sub.dplot.w$Group <- "Actual presence" #label the type of estimate

onset.h2.sub.pla.dplot.w <- pred.bi.int(onset.h2.sub.pla.w)
onset.h2.sub.pla.dplot.w$Group <- "Placebo presence"

plot.onset.h2.sub.w <- rbind(onset.h2.sub.dplot.w, #combine for ggplot
                           onset.h2.sub.pla.dplot.w)

# save output as Rdata files
# save(onset.h2.sub.w, file = "onset.h2.sub.w.Rdata")
# save(onset.h2.sub.pla.w, file = "onset.h2.sub.pla.w.Rdata")
# save(plot.onset.h2.sub.w, file = "plot.onset.h2.sub.w.Rdata")

@

<<MainIncidenceModelsH2_wzone, eval=TRUE, echo = FALSE, tidy=TRUE, warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model, with subset data
incidence.h2.sub.w <- bayesglm(incidence_wzone ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       incidence_wzone_1 +
                       #attack_neighbors_sum +
                       incidence_wzone_neighbor +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub,
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h2.sub.w)

## Placebo model
incidence.h2.sub.pla.w <- bayesglm(incidence_wzone ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       incidence_wzone_1 +
                       #attack_neighbors_sum +
                       incidence_wzone_neighbor +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtbsub,
               #prior.scale = 1, prior.df = 3, 
               family = binomial(link = "logit")) 

#summary(incidence.h2.sub.pla.w)

# Generate predicted probs
incidence.h2.sub.dplot.w <- pred.bi.int(incidence.h2.sub.w) #generate predicted probs
incidence.h2.sub.dplot.w$Group <- "Actual presence" #label the type of estimate

incidence.h2.sub.pla.dplot.w <- pred.bi.int(incidence.h2.sub.pla.w)
incidence.h2.sub.pla.dplot.w$Group <- "Placebo presence"

plot.incidence.h2.sub.w <- rbind(incidence.h2.sub.dplot.w, #combine for ggplot
                           incidence.h2.sub.pla.dplot.w)

# save output as Rdata files
# save(incidence.h2.sub.w, file = "incidence.h2.sub.w.Rdata")
# save(incidence.h2.sub.pla.w, file = "incidence.h2.sub.pla.w.Rdata")
# save(plot.incidence.h2.sub.w, file = "plot.incidence.h2.sub.w.Rdata")

@

% PRED PROBS FIGURE FOR H2: WZONE
<<MainModelsH2_sub_wzone, eval=TRUE, echo = FALSE, tidy=TRUE, fig.pos ='H', fig.width = 9, fig.height = 6, out.width= ".99\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, fig.cap= paste("This figure replicates Figure \\ref{fig:MainModelsH2_sub} using wzoneData-based Conflict Onset and Incidence measures.")>>=

# setwd("Paper_Inputs")
# 
# # Bring in RData from UCDP data
# load("plot.onset.h2.sub.w.Rdata")
# load("plot.incidence.h2.sub.w.Rdata")

# Select and rename columns
plot.onset.h2.sub.w$outcome <- "onset"
plot.incidence.h2.sub.w$outcome <- "incidence"
plot.attack.h2.sub$outcome <- "attack"
plot.best.h2.sub$outcome <- "log(best + 1)"

# plots
pd <- position_dodge(0.5) # move them .05 to the left and right

PLOT.onsetincidence.h2a.sub <- bind_rows(plot.onset.h2.sub.w[plot.onset.h2.sub.w$Plot == 3,],
          plot.incidence.h2.sub.w[plot.incidence.h2.sub.w$Plot ==3,]) %>%
  mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.2, .2)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        panel.border = element_rect(colour = "gray", fill=NA, size=.8),
        legend.title = element_blank(),
        legend.justification = c(1, 1), 
        legend.position= c(.99, .99),
        legend.box = "horizontal",
        legend.direction = "vertical", 
        legend.key = element_rect(colour = "transparent", fill = "white")
        ) 

PLOT.onsetincidence.h2b.sub <- bind_rows(plot.onset.h2.sub.w[plot.onset.h2.sub.w$Plot == 6,],
          plot.incidence.h2.sub.w[plot.incidence.h2.sub.w$Plot ==6,]) %>%
  mutate(outcome = fct_relevel(outcome, "onset")) %>%
  ggplot(aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylab("Change in Predicted Probability") +
  xlab("") +
  scale_x_discrete(labels=c("Onset",
                           "Incidence")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(-.2, .2)) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.attack.h2a.sub <- ggplot(plot.attack.h2.sub[c(1,3),], aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-3,3) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.attack.h2b.sub <- ggplot(plot.attack.h2.sub[c(2,4),], aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-3,3) +
  ylab("Change in Predicted Number") +
  xlab("") +
  scale_x_discrete(labels=c("Violent Events")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h2a.sub <- ggplot(plot.best.h2.sub[c(1,3),], aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.5,.5) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )

PLOT.best.h2b.sub <- ggplot(plot.best.h2.sub[c(2,4),], aes(x = outcome, y = Means, 
                           group=Group, colour=Group
                        )) + 
  geom_point(position=pd, size=3) +
  geom_errorbar(aes(ymin = as.numeric(as.character(CIL)), 
                    ymax = as.numeric(as.character(CIU))), 
                width = 0, size = .7, position = pd) +
  geom_hline(aes(yintercept=0)) +
  ylim(-.5,.5) +
  ylab("Change in Predicted Number (logged)") +
  xlab("") +
  scale_x_discrete(labels=c("Battle Deaths")) +
  scale_colour_manual(values=c("black", "gray60")) +
  guides(shape = FALSE) +
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.border = element_rect(colour = "gray", fill=NA, size=.8)
        )


patch1 <- (PLOT.onsetincidence.h2a.sub + ggtitle("Effect of Concentrated Refugee Presence (H2a)") + 
  theme(plot.title = element_text(hjust= 0, size = 11))) +
  (PLOT.attack.h2a.sub + PLOT.best.h2a.sub) 

patch2 <- (PLOT.onsetincidence.h2b.sub + ggtitle("Effect of Dispersed Refugee Presence (H2b)") + 
  theme(plot.title = element_text(hjust= 0, size = 11))) +
  (PLOT.attack.h2b.sub + PLOT.best.h2b.sub) 

patch1/patch2

@



%%%%% RELEVANT EXPERT INTERVIEW QUOTES
\clearpage
\newpage
\section{Expert Interviews with UNHCR Officials} % Use key with officials' names == position titles 
\label{SIsec:interviews}

In 2018, we interviewed twelve senior officials of the UNHCR about their expertise knowledge on UNHCR current and historical policies and practices; refugee issues in general; and their observations of refugee-host relations and refugee hosting dynamics in the countries where they had worked in their capacity as UNHCR officials. To be clear, these were not elected public officials.
Our institution's IRB advised us that because we were interviewing experts on their professional expertise (external topic) rather than asking any personal information, we did not need to submit a protocol for IRB review.

With respect to the specific principles outlined in the 2020 APSA Principles and Guidance for Human Subjects Research\footnote{\url{https://connect.apsanet.org/hsr/principles-and-guidance/}}, we obtained informed written (by scheduling over email with our project summary and list of possible questions) and oral (at the start of the interview) consent. Our research involved no to minimal harm or trauma. We did not use any deception in our interviews. We were also considerate of the experts' time, by limiting our interviews to 1-2 hours. We do not expect their reputations to be negatively affected, especially since we keep their identities confidential. We do not name them, but rather only refer to them by their position or title.

Below, we include additional longer-form quotes from our expert interviews categorized by the following topics.


{\parindent0pt

\setstretch{1.2}  

\subsection{General null effects of refugee-hosting on conflict}

``Despite all the hysteria of refugees and conflict, refugees account for a small part of the population. This is an eminently manageable problem. Those populations are predominantly found in the poorer countries of the developing world. The Syrian arrivals may have distorted that picture. Those kind of figures in Uganda and in Bangladesh, they have been managed without disastrous consequences by governments with much fewer resources. What occurred in Europe is a political crisis, it's not a refugee crisis... I'm very familiar with the Afghan situation and in 2013 and 2014 with the Syrian, and I've never see a conflict at scale. Of course, like in Lebanon where the country's situation has had to … under normal demographic situations, the would only reach it in 2050 if not for the refugees. And they themselves emerged from a long civil war, and turbulent relations with Syria. It's quite remarkable that there hasn't been any major conflict.'' -- Senior Official at Policy Development and Evaluation, interview conducted on July 6, 2018.\\

``Certainly there are instances where the presence of refugees creates [non-violent] conflict with the local community like competition for resources, access to fire wood collection, environmental impact. You see those kinds of immediate impact. In a broader sense, I have never seen a situation where refugees have fueled [violent] conflict.'' -- Senior Policy Officer, interview conducted on July 23, 2018.

\subsection{On militarized refugees and prevention}

``From the Kakuma perspective, the refugee camps are of a civilian nature and we try to maintain it that way. When we have identified armed elements, we tell the government and their security apparatus could separate them from the civilians. It is mainly the government who identifies them at the entry points, so the government interviews them. Armed elements are not simply child soldiers, but known commanders. It's not of course perfect, but once they identify them. Child soldiers are a different ballgame, the civilian asylum system does not mean they are not ex-combatants. When they come and they are not engaged in any military activity then they are accepted. As children, once they come, in interviews and in schools, the teachers and social workers realize that there is something and when the method is brought to us, you try to pay much more closer attention to that. Not like in West Africa, we do not see many child soldiers. Our efforts try to prevent them from becoming combatants. They might choose to go back to the other side of the border to join military action. We are engaging in vocational training, sports, arts, we are creating much positive alternatives.'' -- Senior Official at Kakuma Refugee Camp, interview conducted on July 23, 2018.

\subsection{Refugee-host relations}

``Based on my experience, what you see often is that refugees go to parts of the country, cross the border not too far where they came from, and the border that you see is artificial. You have on both sides of the border population that know each other from the same clan. Like Dadaab the Somalis on either side. That's the first element, you have communities which know each other who have been moving up and down, not because of conflict, due to drought and famine. These communities know each other and are often homogeneous. In this kind of context, you don't go where you're not welcome or wanted. You move to parts of the region where you know people will not reject you. People who are in a position to provide you some support.'' -- Senior Official at UNHCR Kenya, interview conducted on July 6, 2018.\\

``Second, when large populations across the borders take place, the shock of the demographic expansion can be to a greater or lesser extent, depending on the ethnic affinities across the border, most instances these populations move to areas with similar cultural, religious, linguistic affinities. With the way that the colonial boundaries were drawn in Africa, you find that the populations are very very similar.'' -- Senior Official at Policy Development and Evaluation, interview conducted on July 6, 2018.\\

``We would assume that if different communities live side by side or live together, there will be conflict. The working assumption is that there will be conflict. So UNHCR and aid agencies try to put in place prevention mechanisms. And that means conflict between refugees and host populations. That normally is done over time and by creating informal structures, committees, where the local people like elders and chiefs would meet regularly. So that is the understanding that we have. There are other issues where UNHCR would advocate that camps should not be outnumbering local communities. That we limit camps in size. We would assume that in poor countries that the host population would need to be compensated in one way or another for hosting refugees, and that compensation is either giving them employment in camps or giving them, mobilizing other partners to invest in host communities. That is the normal situation. Every community has structures. Even the refugee community is not just some amorphous group of people but they have spokespeople, traditional leaders, more educated than others. We will find these people pretty fast and easily because they are our interlopers. On the side of the host communities, we also have existing institutional structures: state sponsored, traditional chiefs this and that. Depending on circumstances, we would try to sensitize the local government at an early stage on setting up these informal structures in the beginning... What I would call this is the unwritten rulebook. I doubt you will find a lot of in our archives and written instructions. This is in a way a part of the oral culture of UNCHR, colleagues who have been in the business for many years, try what has worked elsewhere and train young colleagues. In the end of the day this is all common sense... The working assumption is that conflict is going to happen and that it needs to be managed, and if not, it might become violent. So you better intervene early.'' -- Senior Official at Division of Programme Support and Management, interview conducted on September 12, 2018.

\subsection{Effects of refugee-hosting on development, infrastructure, other services}

``Good relationships between citizens and refugees is a phenomenon that is monitored, and a goal that drives protection interventions, such as infrastructure development or individual assistance programs that are allocated specifically to local communities rather than refugees alone. The intent is to help people (refugees) make a life, while benefitting local communities. Unfortunately there is not much data on this.'' -- Senior Regional Protection Officer, interview conducted on June 25, 2018.\\

``There may be a hospital built for the refugees or clinic, and then it is opened to the host community. Schools, and things like that. There's an effort like that is some areas.'' -- Senior Official 1 at FICSS, interview conducted on June 25, 2018.\\

``On the African continent, the assistance provided is very significant, in areas with limited government presence, areas that were not priorities. Garrison in northeastern part of Kenya, the level of illiteracy and poverty. Same thing in Turkana, the presence of refugees brought better schools, better healthcare, brought food assistance.'' -- Senior Official at UNHCR Kenya, interview conducted on July 6, 2018.\\

``On the distribution of humanitarian resources, I suppose the main observation that in terms of when people are concentrated, the tendency is to invest in services and facilities that directly serve that population. I suppose this has been the tradition that when camps are set up the way that things were traditionally done, you basically set up a medical center in the camp, water infrastructure serving that camp, food distribution would be carried out in the camp. The aid would be much more focused on the refugee population. Increasingly over the years, there was a real push to set things up in such a way that the local community was also benefitting from them like health clinics for example, the nearby villages to also benefit from that facility. More recently, the shift has been to how you boost services more generally to areas where refugees are located, enhancing existing services. Where you have a camp based program, what that does do is allow you to concentrated your resources in one location. Whereas if you're working in a context that is much more spread out, in smaller settlements or perhaps not in camps at all, the shift is much more is to boost national and local services rather than set up dedicated facilities or subsidizing refugees through cash assistance. Or you might have a program that targets the most vulnerable among the refugees, and UNHCR reimburses the cost of treatment in the local facilities.'' -- Senior Policy Officer, interview conducted on July 23, 2018.

\subsection{Economic effects of hosting refugees}

``When there is a reasonable number of refugees, it attracts resources of international community. There is a net economic gain in that area... The physical pretense of a large number of internationals might have some sort of calming effect, that might change the social and political dynamics.'' -- Senior Official of Shelter \& Settlements Section, interview conducted on July 3, 2018.\\

``If you look at rural sub-Saharan Africa, there is not much money circulating in the villages. If you have a camp where you pay staff, then you create an imbalance. The refugee camps become a big market where locals sell their produce there.'' -- Senior Official at Division of Programme Support and Management, interview conducted on September 12, 2018.\\

``The winners, you have a larger number of consumers, you do have people with capital and skills that can contribute positively to the economy. And employers can find a ready supply of laborers at lower wages. The first 6 months a new equilibrium imposes itself. With resources from the international community, you have strong injections of finance into the local economy. So it's not altogether surprising that you go through the trajectory of at first shock, but then a period of stabilization, and even a changed environment. And things can evolve relatively positively.'' -- Senior Official at Policy Development and Evaluation, interview conducted on July 6, 2018.\\

``[On Kakuma] Today if they want to have a job, they come here. Because there is a vibrant economy here, not only with humanitarian agencies. The refugees contribute to 3\% economic growth every year in Kakuma. It is the second biggest city in Kenya, it's open. So it is totally 15 km radius, there are a quarter million people living who need food, washing detergent, whatever you need. They have driving schools, hardware stores, cinemas, restaurants, hotels, name it, bodas bodas, electronics, tailors, sports clubs. Long term, I think Kakuma it is the Kenyan government to maintain this camp. We are now integrating them into the Turkana government and population. And the government there realized that if they don't include this population (15\%) into their development plans, they must because they are an economic engine.'' -- Senior Official at Kakuma Refugee Camp, interview conducted on July 23, 2018.\\

``There are opportunities, particularly in camps, which creates a demand which often becomes a golden goose, an economic hub. Some of the camps in Kenya and Jordan. This is not something that authorities would agree with. They emphasize the negative effects as a funding strategy, and the negative rhetoric leads to negative attitudes because the refugees are being scapegoated.'' -- Senior Official at UNHCR Syria, interview conducted on August 2, 2018.\\

\subsection{Effects of refugee-hosting on local state capacity}

``I think the refugees arrive and there is a lack of state security, UNHCR asks for a significant increase in state security and the international community does, and we usually pay the state to provide additional security. Additional police, military, all kinds of things take place because there are conflicts within the community, but not necessarily because of conflicts surrounding, but just because of a large population that is displaced to ensure that there is some kind of law in order. It is not the responsibility of the humanitarian community to provide law and order, it is the responsibility of the host government, so we always ask for the host government to provide that law and order.'' -- Senior Official 1 at FICSS, interview conducted on June 25, 2018.\\

`` It's not only international humanitarian agencies, often times it's the military and the police presence are also there.'' -- Senior Official 2 at FICSS, interview conducted on June 25, 2018.\\

``The other point worth making is that very often these populations have accumulated in border areas which are often the least developed parts of the country. So while the arrival of refugees evokes a security reflex, those in charge of refugees in the government are placed under the ministry of security or ministry of the interior rather than the ministry of social, they tend to attract financing even through humanitarian channels into areas that generally did not have that interest in that part of the country. You have external financing that leverages government fiscal expenditures.`` -- Senior Official at Policy Development and Evaluation, interview conducted on July 6, 2018.

\subsection{Differences between refugee camps and settlements}

``In camps, more money is spent per capita on refugees, and there is increased ability to deliver assistance. Camps are more cost effective and provide physical security and services, but they are not meant for people to stay there forever. An example is Iraq, where refugees are hosted in camps for security reasons.'' -- Senior Regional Protection Officer, interview conducted on June 25, 2018.\\

``One thing I think is important to bear in mind, in some of these instances the government policy has been containment into camps. As long as the political will and support is there on the part of the donors to continue to subsidize those arrangements, then the issue of conflict with local citizens does not arise. My view is that [camp-based policy] is not helpful and it's more costly than it needs to be.'' -- Senior Official at Policy Development and Evaluation, interview conducted on July 6, 2018.\\

``At UNHCR, we don't advocate for camps. We would like no big concentrations of refugee populations in a place and yet, still the countries prefer a camp style arrangement for various reasons. Some of them might say that `I want more efficiency of programs, I want to make sure that there is no burden on my budgets so I can geographically distinct them, I want for security reasons to control the movements.' There are many reasons why the governments prefer camps.'' -- Senior Official at Kakuma Refugee Camp, interview conducted on July 23, 2018.

\subsection{How contemporary refugee-hosting dynamics differ from the Cold War era}

``Basically your [study] period is after Cold War. The Cold War period was a very distinct period because in Africa, there were a lot of freedom fighters.'' -- Senior Official at Kakuma Refugee Camp, interview conducted on July 23, 2018.\\

``There's been quite an evolution since the 1980s, refugees camps in Pakistan were used as military bases. Cambodian camps were used as bases to oppose Cambodian occupation. Since that time, that form of politically motivated policy is no longer standard. UNHCR in particular has insisted as much as possible to maintain the civilian characteristic of these camps.'' -- Senior Official at Policy Development and Evaluation, interview conducted on July 6, 2018.

\subsection{Suggestions for better policy}

``UNHCR has a standing policy that says camps are good for emergencies, but it's better in the long run to give people freedom of movement. In addition, it could negatively impact a country, by pushing the country into restricting freedom of movement to greater degree by cloistering citizens into a singular province.'' -- Senior Regional Protection Officer, interview conducted on June 25, 2018.\\

``[In Kenya] we've had a number of situations where the government said that refugees were a threat to national security. We hear that around the world now. It's clear that when you have a camp like Dadaab on the border with Somalia where you have an active conflict. It's not a conventional war, it's terrorist attacks. What we've been telling the government, you should have better asylum management and strategy. To register people better, to avoid strict encampment policy to lessen the radicalization of people. We've tried to discuss with the government with a policy towards socio-economic inclusion. Try to move away from strict encampment policy. This is what we're trying to push for. It's better for security than have a concentration of people. The idea is to have a more settlement type than a camp. Then you would still be able to identify the population but the type of assistance will be different. Instead of building a school further away from the host community, you can strengthen existing infrastructure. You look to see what the needs of the host populations are and taking into account. To a certain extent, that is what is being done in Uganda. What you then do is to augment the host community infrastructure.'' -- Senior Official at UNHCR Kenya, interview conducted on July 6, 2018.

} % close parindent

% REGRESSION TABLES 
\clearpage
\newpage
\section{Regression Tables}
\label{SIsec:regressiontables}

\subsection{H2: Concentrated or Dispersed Refugee Presence on Conflict}

<<RegTabMainModelsH2, eval = TRUE, echo = FALSE, tidy=TRUE, fig.width = 7, fig.height = 4, out.width= ".9\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, results='asis'>>=

setwd("Paper_Inputs")

# Bring in RData
load("onset.h2.sub.Rdata") 
load("onset.h2.sub.pla.Rdata")
load("incidence.h2.sub.Rdata")
load("incidence.h2.sub.pla.Rdata")

print(texreg(list(onset.h2.sub, 
                  onset.h2.sub.pla,
                  incidence.h2.sub,
                  incidence.h2.sub.pla, 
                  attack.h2.sub,
                  attack.h2.sub.pla,
                  best.h2.sub,
                  best.h2.sub.pla),
          custom.model.names = c("Onset", "Onset (p)",
                          "Incidence", "Incidence (p)",
                          "Events", "Events (p)",
                          "Deaths", "Deaths (p)"),
          omit.coef='Country|year',
          include.ci = FALSE,
          include.aic = TRUE, 
          include.bic = FALSE,
          include.loglik = TRUE, 
          include.deviance = FALSE, 
          include.rmse = FALSE,
          custom.coef.names=c("Intercept",
                              "Refugee Presence",
                              "Refugee Presence in Other Provs",
                              "Ref Pres x Ref Pres in Other Provs",
                              "Lagged Onset",
                              "Sum Events in Neighbor Provs",
                              "Lagged Population (logged)",
                              "Lagged GDP",
                              "Terrain Ruggedness",
                              "Province Size (sqkm)",
                              "Distance from border (km logged)",
                              "Distance from capital (km logged)",
                              "IDP Presence",
                              "Placebo Refugee Presence",
                              "Placebo Ref Pres x Ref Pres in Other Provs",
                              "Lagged Incidence",
                              "Lagged Events",
                              "Lagged Deaths"),
          reorder.coef=c(2,14,3,4,15,5,16,17,18,6,7,8,9,10,11,12,13,1),
          caption = "Regression table for H2: Effects of Geographically Concentrated (coefficient for Refugee Presence) or Dispersed (coefficient for Refugee Presence x Other Prov Refugee Presence) Presence on Conflict Outcomes and their respective Placebo models (p). Note that for outcomes Onset and Incidence, the data is from 1990-2008, while Violent Events and Battle Deaths (logged) is 1990-2018. All models include control variables and Country and Year fixed effects.",
          label = "tab:MainModelsH2",
          scalebox='0.77',
          use.packages = FALSE,
          float.pos = "H"))

@

\newpage
\subsection{H3: Conditional Risk Reduction through Increased Infrastructure}
<<NightLightsCModelsH1, eval=TRUE, echo = FALSE, tidy=TRUE, fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 1, with full data
nlights_calib_mean.h1.full <- lm_robust(nlights_calib_mean ~ rtb + 
                       rtb.other + 
                       nlights_calib_mean_1 +
                       nlights_mean_neighbor +
                       attack_neighbors_sum + 
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddata)

#summary(nlights_calib_mean.h1.full)

## Placebo model
nlights_calib_mean.h1.full.pla <- lm_robust(nlights_calib_mean ~ rtb.placebo + 
                       rtb.other + 
                       nlights_calib_mean_1 +
                       nlights_mean_neighbor +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = plagedrtb)

#summary(nlights_calib_mean.h1.full.pla)


## Treatment model 3, with subset data
nlights_calib_mean.h1.sub <- lm_robust(nlights_calib_mean ~ rtb + 
                       rtb.other + 
                       nlights_calib_mean_1 +
                       nlights_mean_neighbor +
                       attack_neighbors_sum + 
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = geddatasub)

#summary(nlights_calib_mean.h1.sub)

## Placebo model
nlights_calib_mean.h1.sub.pla <- lm_robust(nlights_calib_mean ~ rtb.placebo + 
                       rtb.other + 
                       nlights_calib_mean_1 +
                       nlights_mean_neighbor +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = plagedrtbsub)

#summary(nlights_calib_mean.h1.sub.pla)


# Generate predicted probs
nlights_calib_mean.h1.full.dplot <- tidy(nlights_calib_mean.h1.full)[nlights_calib_mean.h1.full$term == "rtb",] 
nlights_calib_mean.h1.full.dplot$Group <- "Actual presence" #label the type of estimate

nlights_calib_mean.h1.full.pla.dplot <- tidy(nlights_calib_mean.h1.full.pla)[nlights_calib_mean.h1.full.pla$term == "rtb.placebo",] 
nlights_calib_mean.h1.full.pla.dplot$Group <- "Placebo presence"

plot.nlights_calib_mean.h1.full <- rbind(nlights_calib_mean.h1.full.dplot, #combine for ggplot
                             nlights_calib_mean.h1.full.pla.dplot)
 
nlights_calib_mean.h1.sub.dplot <- tidy(nlights_calib_mean.h1.sub)[nlights_calib_mean.h1.sub$term == "rtb",] 
nlights_calib_mean.h1.sub.dplot$Group <- "Actual presence" #label the type of estimate


nlights_calib_mean.h1.sub.pla.dplot <- tidy(nlights_calib_mean.h1.sub.pla)[nlights_calib_mean.h1.sub.pla$term == "rtb.placebo",] 
nlights_calib_mean.h1.sub.pla.dplot$Group <- "Placebo presence"
 
plot.nlights_calib_mean.h1.sub <- rbind(nlights_calib_mean.h1.sub.dplot, #combine for ggplot
                            nlights_calib_mean.h1.sub.pla.dplot)

@

<<NightLightsCModelsH2, eval=TRUE, echo = FALSE, tidy=TRUE, fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE>>=

## Treatment model 1, with full data
nlights_calib_mean.h2.full <- lm_robust(nlights_calib_mean ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       nlights_calib_mean_1 +
                       nlights_mean_neighbor +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = geddata)

#summary(nlights_calib_mean.h2.full)

## Placebo model
nlights_calib_mean.h2.full.pla <- lm_robust(nlights_calib_mean ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       nlights_calib_mean_1 +
                       nlights_mean_neighbor +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = plagedrtb)

#summary(nlights_calib_mean.h2.full.pla)


## Treatment model 3, with subset data
nlights_calib_mean.h2.sub <- lm_robust(nlights_calib_mean ~ rtb + 
                       rtb.other + 
                       rtb_rtb.other +
                       nlights_calib_mean_1 +
                       nlights_mean_neighbor +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = geddatasub)

#summary(nlights_calib_mean.h2.sub)

## Placebo model
nlights_calib_mean.h2.sub.pla <- lm_robust(nlights_calib_mean ~ rtb.placebo + 
                       rtb.other + 
                       rtb.placebo_rtb.other +
                       nlights_calib_mean_1 +
                       nlights_mean_neighbor +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),   
               data = plagedrtbsub)

#summary(nlights_calib_mean.h2.sub.pla)


# Generate predicted probs
nlights_calib_mean.h2.full.dplot <- pred.lm.int.r(nlights_calib_mean.h2.full)
nlights_calib_mean.h2.full.dplot$Group <- "Actual presence" #label the type of estimate

nlights_calib_mean.h2.full.pla.dplot <- pred.lm.int.r(nlights_calib_mean.h2.full.pla)
nlights_calib_mean.h2.full.pla.dplot$Group <- "Placebo presence"

plot.nlights_calib_mean.h2.full <- rbind(nlights_calib_mean.h2.full.dplot, #combine for ggplot
                             nlights_calib_mean.h2.full.pla.dplot)
 
nlights_calib_mean.h2.sub.dplot <- pred.lm.int.r(nlights_calib_mean.h2.sub)
nlights_calib_mean.h2.sub.dplot$Group <- "Actual presence" #label the type of estimate

nlights_calib_mean.h2.sub.pla.dplot <- pred.lm.int.r(nlights_calib_mean.h2.sub.pla)
nlights_calib_mean.h2.sub.pla.dplot$Group <- "Placebo presence"

plot.nlights_calib_mean.h2.sub <- rbind(nlights_calib_mean.h2.sub.dplot, #combine for ggplot
                            nlights_calib_mean.h2.sub.pla.dplot)

@

<<RegTabMainModelsH3, eval = TRUE, echo = FALSE, tidy=TRUE, fig.width = 7, fig.height = 4, out.width= ".9\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, results='asis'>>=

print(texreg(list(nlights_calib_mean.h2.sub, 
                  nlights_calib_mean.h2.sub.pla),
          custom.model.names = c("Nighttime Lights", "Nighttime Lights (p)"),
          omit.coef='Country|year',
          include.ci = FALSE,
          include.aic = TRUE, 
          include.bic = FALSE,
          include.loglik = TRUE, 
          include.deviance = FALSE, 
          include.rmse = FALSE,
          custom.coef.names=c("Intercept",
                              "Refugee Presence",
                              "Refugee Presence in Other Provs",
                              "Ref Pres x Ref Pres in Other Provs",
                              "Lagged Nighttime Lights",
                              "Ave Nighttime Lights in Neighbor Provs",
                              "Sum Violent Events in Neighbor Provs",
                              "Lagged Population (logged)",
                              "Lagged GDP",
                              "Terrain Ruggedness",
                              "Province Size (sqkm)",
                              "Distance from border (km logged)",
                              "Distance from capital (km logged)",
                              "IDP Presence",
                              "Placebo Refugee Presence",
                              "Placebo Ref Pres x Ref Pres in Other Provs"),
          reorder.coef=c(2,15,3,4,16,5,6,7,8,9,10,11,12,13,14,1),
          caption = "Regression table for H3: Effects of Geographically Concentrated (coefficient for Refugee Presence) or Dispersed (coefficient for Refugee Presence x Other Prov Refugee Presence) Presence on Nighttime Lights and their respective Placebo models (p). All models include control variables and Country and Year fixed effects.",
          label = "tab:MainModelsH3",
          scalebox='0.8',
          use.packages = FALSE,
          float.pos = "H"))

@

\newpage
\subsection{H1 Models Replacing Refugee Presence (binary) Variable with Refugee Population and Ratio of Refugee to Local Population}
\label{SIsec:regpopulation}

For the 2010-2015 Africa subset data, Models 1 and 4 show the effects of logged Refugee Population on Number of Violent Events and logged Battle Deaths. Models 2 and 5 show the effects of the ratio of Refugee Population to Local Population in the Province-Year on the same conflict outcomes. And Models 3 and 6 show our main H1 models using Refugee Presence as the independent variable of interest; we include these to make sure that results are comparable since we are using a subset data of African countries from 2010 to 2015. 

There are null effects for all three Refugee related independent variables. But there does appear to be a negative (not statistically significant) effect of greater ratio of refugees to locals on these conflict outcomes.

<<RegTabMainModelsH1_pop, eval = TRUE, echo = FALSE, tidy=TRUE, fig.width = 7, fig.height = 4, out.width= ".9\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, results='asis'>>=

## Treatment model 3, with sub Africa data

# regular model with binary refugee presence
attack.h1.subaf <- lm_robust(attack ~ rtb + 
                       rtb.other + 
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = gedpopafrica)

#summary(attack.h1.subaf)

# Same model using population 
attack.pop.h1.subaf <- lm_robust(attack ~ log(Total+1) + 
                       rtb.other + 
                       attack_1 +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = gedpopafrica)

#summary(attack.pop.h1.subaf)


## Same model with ratio of ref population / local population (per reviewer comment)
attack.popratio.h1.subaf <- lm_robust(attack ~ RefPop_Ratio + 
                       rtb.other + 
                       attack_1 +
                       attack_neighbors_sum +
                       #log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = gedpopafrica)

#summary(attack.popratio.h1.subaf)

## Treatment model 3, with sub Africa data

# regular model with binary refugee presence
best.h1.subaf <- lm_robust(log(best+1) ~ rtb + 
                       rtb.other + 
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = gedpopafrica)

#summary(best.h1.subaf)

# Same model using population 
best.pop.h1.subaf <- lm_robust(log(best+1) ~ log(Total+1) + 
                       rtb.other + 
                       log(best_1+1) +
                       attack_neighbors_sum +
                       log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = gedpopafrica)

#summary(best.pop.h1.subaf)


## Same model with ratio of ref population / local population (per reviewer comment)
best.popratio.h1.subaf <- lm_robust(log(best+1) ~ RefPop_Ratio + 
                       rtb.other + 
                       log(best_1+1) +
                       attack_neighbors_sum +
                       #log_pop_1 + 
                       gcp_ppp_1 + 
                       #excluded_mean +
                       STD + 
                       SQKM_ADMIN +
                       log_bdist2 + 
                       #log(bdist3) +
                       log_capdist +
                       idpb + 
                       as.factor(Country) + 
                       as.factor(year),  
               data = gedpopafrica)

#summary(best.popratio.h1.subaf)

print(texreg(list(attack.pop.h1.subaf,
                  attack.popratio.h1.subaf,
                  attack.h1.subaf,
                  best.pop.h1.subaf,
                  best.popratio.h1.subaf,
                  best.h1.subaf),
          custom.model.names = c("Events", "Events", "Events",
                                 "Deaths", "Deaths", "Deaths"),
          omit.coef='Country|year',
          include.ci = FALSE,
          include.aic = TRUE, 
          include.bic = FALSE,
          include.loglik = TRUE, 
          include.deviance = FALSE, 
          include.rmse = FALSE,
          custom.coef.names=c("Intercept", 
                              "Refugee Population (logged)",
                              "Refugee Presence in Other Provs",
                              "Lagged Events",
                              "Sum Events in Neighbor Provs",
                              "Lagged Population (logged)",
                              "Lagged GDP",
                              "Terrain Ruggedness",
                              "Province Size (sqkm)",
                              "Distance from border (km logged)",
                              "Distance from capital (km logged)",
                              "IDP Presence",
                              "Ratio Refugee--Local Population",
                              "Refugee Presence (binary)",
                              "Lagged Deaths"),
          reorder.coef=c(2, 13, 14, 3, 4, 15, 5, 6, 7, 8, 9, 10, 11, 12, 1),
          caption = "Regression table for Effects of Refugee Population on Conflict Outcomes in Africa (2010--2015). All models include control variables and Country and Year fixed effects.",
          label = "tab:MainModels_pop_H1",
          scalebox='0.85',
          use.packages = FALSE,
          float.pos = "H"))

@



%%%% S&G Extension
\clearpage
\newpage
\section{\cite{Salehyan:2006} Extension Exercise}
\label{SIsec:SGextension}

Our study at the subnational level shows a null effect of refugee presence on conflict outcomes, and in cases when refugees are geographically concentrated in the host country, substantively large negative effects. These findings appear to run counter to the existing seminal quantitative research of \cite{Salehyan:2006}, who find that increased refugee populations from neighboring countries are correlated with increased likelihood of conflict onset. However, our study covers the period from 1990 to 2018, while their data are from the period from 1951 to 2001. To test whether the results of our analysis generally obtain if we carry out following the country-level approach that \cite{Salehyan:2006} take, we replicate their study for the period 2002 through 2013.\footnote{2013 was the last year for which were able to obtain country-year data for all variables included in their model.}

<<SGextension, eval = TRUE, echo=FALSE, results='hide', message=FALSE>>=

library(plyr)

# Load data:
setwd("Paper_Inputs") 

# IO Replication data for S&G's original article:
load("IOrepdata.RData")

SGoriginal <- x
x <- NULL

# Post-Cold War replication dataset:
SGextension <- read.csv("S_G_Extension_mod.csv", header = T)

# First, create common set of names for purposes of exporting statistical results:
colnames(SGextension)[colnames(SGextension)=="onset"] <- "nonset"
colnames(SGextension)[colnames(SGextension)=="refugee_population_log"] <- "lrefpct"
colnames(SGextension)[colnames(SGextension)=="civilwarneighbor"] <- "nbcwbin"
colnames(SGextension)[colnames(SGextension)=="polity2"] <- "polityb"
colnames(SGextension)[colnames(SGextension)=="polity2_squared"] <- "polityb2"
colnames(SGextension)[colnames(SGextension)=="l_gdp_pc"] <- "lngdp"
colnames(SGextension)[colnames(SGextension)=="l_population"] <- "lnpop"
colnames(SGextension)[colnames(SGextension)=="size"] <- "het"
colnames(SGextension)[colnames(SGextension)=="peaceyrs_mod"] <- "peace1"

# Create cubic splines for peace years with extension data
require(splines)
extensionsplines <- ns(SGextension$peace1, df = 4) 
SGextension$s1a <- extensionsplines[,1]
SGextension$s2a <- extensionsplines[,2]
SGextension$s3a <- extensionsplines[,3]

SGextension$Year <- format(as.Date(SGextension$Year),"%Y")
  
SGextension2 <- SGextension[SGextension$Year > 2001,]

@

<<SGextension1, eval = TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 7, fig.height = 4, out.width= ".9\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, results='asis'>>=

# First, run S&G (1946 - 2001) versus contemporary period (2002 - 2013)

r1 <- glm(nonset ~ lrefpct + 
                    nbcwbin + 
                    polityb + 
                    polityb2 + 
                    lngdp + 
                    lnpop + 
                    het +
                    ethnicnb + 
                    peace1 + 
                    s1a + 
                    s2a + 
                    s3a, 
                  family = binomial(link='logit'), 
                  data = SGoriginal)

r1.wo_ethnicnb <- glm(nonset ~ lrefpct + 
                                nbcwbin + 
                                polityb + 
                                polityb2 + 
                                lngdp + 
                                lnpop + 
                                het + 
                                peace1 + 
                                s1a + 
                                s2a + 
                                s3a, 
                              family = binomial(link='logit'), 
                              data = SGoriginal)

# Third, run contemporary time period models
r2 <- glm(nonset ~ lrefpct + 
                    nbcwbin + 
                    polityb + 
                    polityb2 + 
                    lngdp + 
                    lnpop + 
                    het +
                    ethnicnb + 
                    peace1 + 
                    s1a + 
                    s2a + 
                    s3a,
                  family = binomial(link='logit'), 
                  data = SGextension2)

r2.wo_ethnicnb <- glm(nonset ~ lrefpct + 
                    nbcwbin + 
                    polityb + 
                    polityb2 + 
                    lngdp + 
                    lnpop + 
                    het +
                    #ethnicnb + 
                    peace1 + 
                    s1a + 
                    s2a + 
                    s3a,
                  family = binomial(link='logit'), 
                  data = SGextension2)

r2.wo_ethnicnb.wosplines <- glm(nonset ~ lrefpct + 
                    nbcwbin + 
                    polityb + 
                    polityb2 + 
                    lngdp + 
                    lnpop + 
                    het +
                    #ethnicnb + 
                    peace1, 
                  family = binomial(link='logit'), 
                  data = SGextension2)

#Produce tables:
table <- capture.output({ # Store the stargazer output in a string
  stargazer(r1, r1.wo_ethnicnb, 
            r2, r2.wo_ethnicnb, 
          title = "SG Contemporary Comparison: Logistic regression results of Conflict Onset on Refugee Population (Logged)",
          star.cutoffs = c(.1, .05, .01),
          single.row = TRUE,
          initial.zero = FALSE,
          model.names = FALSE,
          model.numbers = FALSE,
          header = FALSE,
          digits = 2,
          digits.extra = 0,
          df = FALSE,
          dep.var.labels = "Outcome: Civil War Onset",
          column.labels   = c("Original M1",
                              "Original M2",
                              "Extension M1",
                              "Extension M2"),
          covariate.labels = c("Refugees (logged)",
                               "Civil War in Neighbor",
                               "Polity",
                               "Polity Squared",
                               "GDP (logged)",
                               "Population (logged)",
                               "Ethnic Heterogeneity",
                               "Ethnic Kin",
                               "Peace Years",
                               "Spline 1",
                               "Spline 2",
                               "Spline 3"),
          label="tab:SGextension1"
          )
})


table <- gsub("\\begin{tabular}","\\resizebox{1\\textwidth}{!}{\\begin{tabular}", table, fixed=T)

table <- gsub("\\end{tabular}","\\end{tabular}} \\caption*{This table shows the regression results of the \\cite{Salehyan:2006} reanalysis and extension exercise comparing their analysis during the period of their study (1946 - 2001) versus contemporary period (2002 - 2013). The first and second columns replicate \\cite{Salehyan:2006} using their data. The third and fourth columns show results for the same models in the contemporary period using our extended data. This table shows that in the contemporary period (although there are fewer observations because of the truncated time period), the coefficient for refugees becomes negative, although it is not statistically significant.}", table, fixed=T)

cat(table)

@

Before describing the results of this exercise, we briefly detail our replication and extension approach. We extend the authors' analysis to the more contemporary period up to 2013. We construct a panel dataset that matches the original on all variables. For conflict onset, the outcome of interest, we use the same conflict dataset that these scholars used and apply the same binary coding to the ``main dependent variable [of] conflict onset, which is coded 1 for the first year of a conflict and 0 if no conflict takes place in the state in that particular year \citep{gleditsch2002armed}. Subsequent ongoing years of the same conflict are dropped from the estimation sample'' \citep[p. 350]{Salehyan:2006}. We also follow these authors in their treatment of cases of conflict onset that occurred during the years when another conflict was ongoing -- ``In cases where there were multiple conflict onsets in a country, data on a new onset was included if it occurred during the years when another conflict was ongoing'' \cite[p. 350]{Salehyan:2006}. 
For their primary explanatory variable, refugee population, we follow these authors in using country-level data with refugee population values developed by the Population Data Unit of the Office of the United Nations High Commissioner for Refugees (UNHCR). Using the country distance measures developed by the authors for their analysis, we similarly use their ``restrictive definition [of qualifying refugee sending states] in which neighbors are defined by borders falling within a distance of 100 kilometers or less (including contiguity)'' \citep[p. 351]{Salehyan:2006}. Like the authors, we also ``take the natural log of the number of refugees after adding one to the base... and include a measure weighting the total number of refugees from neighboring states relative to the size of the host countries'' \citep[p. 351]{Salehyan:2006}.

With respect to control variables, we include all variables used by the authors. Our sources of data differ only when the datasets used by \cite{Salehyan:2006} do not cover the 2002-2013 additional years. Polity scores\footnote{Although we use the same data source, the most current version of that data has been modified since \cite{Salehyan:2006} used it in their study. In their analysis, they convert ``special transition codes'' to 0 in their analysis. Since then, however, \cite{marshall2002polity} has since taken steps to deal more carefully with these codes. We, therefore, use the ``Polity2'' variable, which ``modifies the combined annual POLITY score by applying a simple treatment, or 'fix,' to convert instances of 'standardized authority scores' (i.e., $\-$66, $\-$77, and $\-$88) to conventional polity scores...''\cite[p. 8]{polity2015}.}, indicators for the presence of transborder ethnic groups\footnote{Given the number of missing observations on this variable, we follow \cite{Salehyan:2006} in estimating the models with and without this variable.} and, separately, of civil war in neighboring countries, and country distances are extracted from the same data sources and processed as described by \cite{Salehyan:2006} in their paper. For gross domestic product, we use World Bank data \citep{links2017world} because the data that the authors provide does not cover as many contemporary years. Similarly, we use World Bank data for population data.\footnote{This may or may not be the source of population data used by \cite{Salehyan:2006}. The source is not specified in their manuscript.} We calculate size of the largest ethnic group using ethnic power relations data \citep{wimmer2009ethnic} because the source of ethnicity data used by \cite{Salehyan:2006} is not available for our study period. We also use the R package \texttt{splines} to create a cubic smoothing spline with three interior knots for peace years. 

<<SGextension2, eval = TRUE, echo = FALSE, tidy=TRUE, fig.pos = 'H', fig.width = 7, fig.height = 4, out.width= ".9\\linewidth", fig.align='center', warning=FALSE, message=FALSE, strip.white=TRUE, results='asis'>>=

# Second, run Cold-War period regression (replicating S&G but excluding data from 1990 on):
# Create data subset:
SGoriginal2 <- SGoriginal[SGoriginal$year < 1990 & SGoriginal$year >= 1951, ]

# Next, run Cold-War regression:
r1.coldwar <- glm(nonset ~ lrefpct + 
                    nbcwbin + 
                    polityb + 
                    polityb2 + 
                    lngdp + 
                    lnpop + 
                    het +
                    ethnicnb + 
                    peace1 + 
                    s1a + 
                    s2a + 
                    s3a, 
                  family=binomial(link='logit'), 
                  data=SGoriginal2)

r1.coldwar.wo_ethnicnb <- glm(nonset ~ lrefpct + 
                                nbcwbin + 
                                polityb + 
                                polityb2 + 
                                lngdp + 
                                lnpop + 
                                het + 
                                peace1 + 
                                s1a + 
                                s2a + 
                                s3a, 
                              family=binomial(link='logit'), 
                              data=SGoriginal2)


# Third, run post-Cold War models
r2.postcoldwar <- glm(nonset ~ lrefpct + 
                    nbcwbin + 
                    polityb + 
                    polityb2 + 
                    lngdp + 
                    lnpop + 
                    het +
                    ethnicnb + 
                    peace1 + 
                    s1a + 
                    s2a + 
                    s3a,
                  family=binomial(link='logit'), 
                  data=SGextension)

r2.postcoldwar.wo_ethnicnb <- glm(nonset ~ lrefpct + 
                    nbcwbin + 
                    polityb + 
                    polityb2 + 
                    lngdp + 
                    lnpop + 
                    het +
                    #ethnicnb + 
                    peace1 + 
                    s1a + 
                    s2a + 
                    s3a,
                  family=binomial(link='logit'), 
                  data=SGextension)

r2.postcoldwar.wo_ethnicnb.wosplines <- glm(nonset ~ lrefpct + 
                    nbcwbin + 
                    polityb + 
                    polityb2 + 
                    lngdp + 
                    lnpop + 
                    het +
                    #ethnicnb + 
                    peace1, 
                  family=binomial(link='logit'), 
                  data=SGextension)

#Produce tables:
table <- capture.output({ # Store the stargazer output in a string
  stargazer(r1.coldwar, r1.coldwar.wo_ethnicnb, 
            r2.postcoldwar, r2.postcoldwar.wo_ethnicnb, 
          title = "Logistic regression results of Conflict Onset on Refugee Population (Logged)",
          star.cutoffs = c(.1, .05, .01),
          single.row = TRUE,
          initial.zero = FALSE,
          model.names = FALSE,
          model.numbers = FALSE,
          header = FALSE,
          digits = 2,
          digits.extra = 0,
          df = FALSE,
          dep.var.labels = "Outcome: Civil War Onset",
          column.labels   = c("Cold War M1",
                              "Cold War M2",
                              "Post-Cold War M1",
                              "Post-Cold War M2"),
          covariate.labels = c("Refugees (logged)",
                               "Civil War in Neighbor",
                               "Polity",
                               "Polity Squared",
                               "GDP (logged)",
                               "Population (logged)",
                               "Ethnic Heterogeneity",
                               "Ethnic Kin",
                               "Peace Years",
                               "Spline 1",
                               "Spline 2",
                               "Spline 3"),
          label="tab:SGextension2"
          )
})

table <- gsub("\\begin{tabular}","\\resizebox{1\\textwidth}{!}{\\begin{tabular}", table, fixed=T)

table <- gsub("\\end{tabular}","\\end{tabular}} \\caption*{This table shows the regression results of the \\cite{Salehyan:2006} reanalysis and extension exercise comparing their analysis during the Cold War and post-Cold War periods. The first and second columns show Cold War results with and without Ethnic Kin, respectively, replicating \\cite{Salehyan:2006} using their data from 1951 - 1989, excluding 1990 and onwards. The third and fourth columns show post-Cold War results with Ethnic Kin and without Ethnic Kin, respectively, during the post-Cold War period for 1990 - 2013. This table shows that the positive effect of refugees on Conflict Onset in \\cite{Salehyan:2006} appears to be driven by (country-year) observations during the Cold War period. When their analysis is carried out for the current post-Cold War period, the coefficient for refugees becomes negative, although it is not statistically significant.}", table, fixed=T)

cat(table)

@

Results of this analysis are consistent with our sub-national analysis, as shown in \ref{tab:SGextension1}, potentially suggesting that conflict dynamics between \cite{Salehyan:2006}'s study period and the more contemporary period are different. While an analysis of the differences between those periods is beyond the scope of this paper, we offer a few thoughts and some exploratory analysis to guide potential future research into heterogeneous treatment effects of refugee communities across time. Noting that one difference between \cite{Salehyan:2006}'s study period and ours is that their study captures many Cold-War years, we carry out an additional analyis whereby we separately replicate their study for the Cold War and contemporary periods.\footnote{For the Cold War anaylsis, this simply involves replicating their analysis using the subset of their original dataset covering only the Cold War period (1951-1989). For the contemporary analysis, we extend the replication carried out above to run from 1990 through 2013.}\footnote{Possible differences in Cold War, post-Cold War dynamics that might affect the relationship between refugees and conflict emerged during discussions with UNHCR officials. One noted, for instance, that our study that ``[t]he Cold War period was a very distinct period because in Africa, there were a lot of freedom fighters.‘’ (Senior Official at Kakuma Refugee Camp, interview conducted on July 23, 2018). Another mentioned that ``[t]here’s been quite an evolution since the 1980s, refugees camps in Pakistan were used as military bases. Cambodian camps were used as bases to oppose Cambodian occupation. Since that time, that form of politically motivated policy is no longer standard. UNHCR in particular has insisted as much as possible to maintain the civilian characteristic of these camps.'' (Senior Official at Policy Development and Evaluation, interview conducted on July 6, 2018).}

Table \ref{tab:SGextension2} shows the regression results of the second analysis. Comparing columns 1 and 2 which cover the Cold War period with columns 3 and 4 which cover the post-Cold War period, the positive relationship between refugees and conflict onset in \cite{Salehyan:2006} is constrained to the Cold War period. The coefficient of (logged) refugee population from neighboring countries remains positive when the analysis is carried out using only Cold War years. For the post-Cold War period, the estimated coefficient is negative. Yet, neither set of coefficients is statistically significant. Because \cite{Salehyan:2006}'s coefficient loses statistical significance when truncated, this may suggest that, rather than Cold War dynamics accounting (at least entirely) for the positive refugee-conflict relationship that they observe, there may be something specifically related to refugee outflows in the 1990s (perhaps the various displacement dynamics that have been documented as the Soviet Union collapsed and regional conflict intensified).

\newpage

\setstretch{1}
\bibliography{refugeeconflict}

\end{document}
